Gungho::Component::RobotsMETA - Automatically Parse Robots META


Gungho documentation Contained in the Gungho distribution.

Index


Code Index:

NAME

Top

Gungho::Component::RobotsMETA - Automatically Parse Robots META

SYNOPSIS

Top

  components:
    - RobotsMETA

DESCRIPTION

Top

This module automatically parses any text/html document for robots exclusion directies embedded in the document.

METHODS

Top

setup

Initializes the component.

handle_response

Overrides Gungho::Component::Core::handle_response()

SEE ALSO

Top

HTML::RobotsMETA


Gungho documentation Contained in the Gungho distribution.

# $Id: /mirror/gungho/lib/Gungho/Component/RobotsMETA.pm 31095 2007-11-26T00:05:40.329716Z lestrrat  $
#
# Copyright (c) 2007 Daisuke Maki <daisuke@endeworks.jp>
# All rights reserved.

package Gungho::Component::RobotsMETA;
use strict;
use warnings;
use base qw(Gungho::Component);
use HTML::RobotsMETA;

__PACKAGE__->mk_classdata($_) for qw(robots_meta);

sub setup
{
    my $self = shift;
    $self->next::method(@_);
    $self->robots_meta( HTML::RobotsMETA->new );
}

sub handle_response
{
    my ($self, $req, $res) = @_;

    if ($res->is_success && $res->content_type =~ m{^text/html}i) {
        eval {
            my $rules = $self->robots_meta->parse_rules( $res->content );
            $res->notes( robots_meta => $rules );
        };
        if ($@) {
            $self->log->debug("Failed to parse " . $res->request->uri . " for robots META information: $@");
        }
    }
    $self->next::method($req, $res);
}

1;

__END__