| Gungho documentation | Contained in the Gungho distribution. |
Gungho::Component::RobotsMETA - Automatically Parse Robots META
components:
- RobotsMETA
This module automatically parses any text/html document for robots exclusion directies embedded in the document.
Initializes the component.
Overrides Gungho::Component::Core::handle_response()
| Gungho documentation | Contained in the Gungho distribution. |
# $Id: /mirror/gungho/lib/Gungho/Component/RobotsMETA.pm 31095 2007-11-26T00:05:40.329716Z lestrrat $ # # Copyright (c) 2007 Daisuke Maki <daisuke@endeworks.jp> # All rights reserved. package Gungho::Component::RobotsMETA; use strict; use warnings; use base qw(Gungho::Component); use HTML::RobotsMETA; __PACKAGE__->mk_classdata($_) for qw(robots_meta); sub setup { my $self = shift; $self->next::method(@_); $self->robots_meta( HTML::RobotsMETA->new ); } sub handle_response { my ($self, $req, $res) = @_; if ($res->is_success && $res->content_type =~ m{^text/html}i) { eval { my $rules = $self->robots_meta->parse_rules( $res->content ); $res->notes( robots_meta => $rules ); }; if ($@) { $self->log->debug("Failed to parse " . $res->request->uri . " for robots META information: $@"); } } $self->next::method($req, $res); } 1; __END__