WWW::RobotRules::Memcache - Use memcached in conjunction with WWW::RobotRules


WWW-RobotRules-Memcache documentation Contained in the WWW-RobotRules-Memcache distribution.

Index


Code Index:

NAME

Top

WWW::RobotRules::Memcache - Use memcached in conjunction with WWW::RobotRules

SYNOPSIS

Top

  use WWW::RobotRules::Memcache;

  my @memcache_servers = ('localhost:11211', '192.168.100.3:11211');

  my $rules = WWW::RobotRules::Memcache->new(@memcache_servers);
  my $ua = WWW::RobotUA->new('my-robot/1.0', 'me@foo.com', $rules);

  # Then just use $ua as usual
  $res = $ua->request($req);

DESCRIPTION

Top

This is a subclass of WWW::RobotRules that uses Cache::Memcache to implement persistent caching of robots.txt and host visit information.

FUNCTIONS

Top

new(server [, server ..])

When creating this object you must pass at least one memcache server.

AUTHOR

Top

Nick Gerakines, <nick at gerakines.net>

BUGS

Top

Please report any bugs or feature requests to bug-www-robotrules-cache at rt.cpan.org, or through the web interface at http://rt.cpan.org/NoAuth/ReportBug.html?Queue=WWW-RobotRules-Cache. I will be notified, and then you'll automatically be notified of progress on your bug as I make changes.

SUPPORT

Top

You can find documentation for this module with the perldoc command.

    perldoc WWW::RobotRules::Cache

You can also look for information at:

* AnnoCPAN: Annotated CPAN documentation

http://annocpan.org/dist/WWW-RobotRules-Cache

* CPAN Ratings

http://cpanratings.perl.org/d/WWW-RobotRules-Cache

* RT: CPAN's request tracker

http://rt.cpan.org/NoAuth/Bugs.html?Dist=WWW-RobotRules-Cache

* Search CPAN

http://search.cpan.org/dist/WWW-RobotRules-Cache

COPYRIGHT & LICENSE

Top


WWW-RobotRules-Memcache documentation Contained in the WWW-RobotRules-Memcache distribution.

package WWW::RobotRules::Memcache;

use strict;
use warnings;

use base 'WWW::RobotRules';

use Cache::Memcached;
use Carp;

our $VERSION = '0.1';

sub new  { 
    my ($class, @mem_nodes) = @_;
    if (! @mem_nodes) {
        Carp::croak('WWW::RobotRules::Memcache servers required')
    }
    my $self = bless { }, $class;
	$self->{'memd'} = Cache::Memcached->new({
        'servers' => [ @mem_nodes ],
    });
    return $self;
}

sub no_visits {
    my ($self, $netloc) = @_;
    my $t = $self->{'memd'}->get("$netloc|vis");
    if (! $t) { return 0; }
    return ( split( /;\s*/, $t ) )[0];
}

sub last_visit {
    my ($self, $netloc) = @_;
    my $t = $self->{'memd'}->get("$netloc|vis");
    if (! $t) { return 0; }
    return ( split( /;\s*/, $t ) )[1];
}

sub fresh_until {
    my ($self, $netloc, $fresh) = @_;
    my $old = $self->{'memd'}->get("$netloc|exp");
    if ($old) {
        $old =~ s/;.*//;
    }
    if (defined $fresh) {
	    $fresh .= "; " . localtime($fresh);
	    $self->{'memd'}->set("$netloc|exp", $fresh);
    }
    return $old;
}

sub visit {
    my($self, $netloc, $time) = @_;
    $time ||= time;

    my $count = 0;
    my $old = $self->{'memd'}->get("$netloc|vis");
    if ($old) {
	    my $last;
	    ($count,$last) = split(/;\s*/, $old);
	    if ($last > $time) { $time = $last; }
    }
    $count++;
    $self->{'memd'}->set("$netloc|vis", "$count; $time; " . localtime($time));
    return 1;
}

sub push_rules {
    my($self, $netloc, @rules) = @_;
    my $cnt = 1;
    while ($self->{'memd'}->get("$netloc|r$cnt")) {
	    $cnt++;
    }
    foreach my $rule (@rules) {
        $self->{'memd'}->set("$netloc|r$cnt", $rule);
	    $cnt++;
    }
    return 1;
}

sub clear_rules {
    my ($self, $netloc) = @_;
    my $cnt = 1;
    while ($self->{'memd'}->get("$netloc|r$cnt")) {
	    $self->{'memd'}->delete("$netloc|r$cnt");
	    $cnt++;
    }
    return 1;
}

sub rules {
    my($self, $netloc) = @_;
    my @rules = ();
    my $cnt = 1;
    while (my $rule = $self->{'memd'}->get("$netloc|r$cnt")) {
        push @rules, $rule;
        $cnt++;
    }
    return @rules;
}

1;
__END__