WWW::RobotRules::DBIC - Persistent RobotRules which use DBIC.


WWW-RobotRules-DBIC documentation Contained in the WWW-RobotRules-DBIC distribution.

Index


Code Index:

NAME

Top

WWW::RobotRules::DBIC - Persistent RobotRules which use DBIC.

DESCRIPTION

Top

WWW::RobotRules::DBIC is a subclass of WWW::RobotRules, which use DBIx::Class to store robots.txt info to any RDBMS.

SYNOPSIS

Top

    use WWW::RobotRules::DBIC;
    use LWP::RobotUA;

    my $rules = WWW::RobotRules::DBIC->new('dbi:mysql:robot_rules', 'root', '', \%options);
    my $ua = LWP::RobotUA->new(
       agent => 'YourRobot/1.0',
       from => 'you@example.com',
       rules => $rules,
    );

AUTHOR

Top

Tomohiro IKEBE, <ikebe@shebang.jp>

SEE ALSO

Top

WWW::RobotRules DBIx::Class

COPYRIGHT & LICENSE

Top


WWW-RobotRules-DBIC documentation Contained in the WWW-RobotRules-DBIC distribution.

package WWW::RobotRules::DBIC;

use strict;
use base qw(WWW::RobotRules);
use WWW::RobotRules::DBIC::Schema;
use DateTime;
use DateTime::Format::Strptime;

our $VERSION = '0.01';

sub new {
    my($class, @connect_info) = @_;
    my $self = bless {
        schema => WWW::RobotRules::DBIC::Schema->connect(@connect_info),
    }, $class;
    $self;
}

sub agent {
    my($self, $agent) = @_;
    my $old = $self->{agent};
    if (defined $agent && (!$old || $agent ne $old)) {
        $agent =~ s|/.*$||; # remove version number.
        my $new = $self->{schema}->resultset('UserAgent')->find_or_create({
            name => $agent,
        });
        $self->{agent} = $agent;
        $self->{agent_id} = $new->id;
        $self->{_netloc} = undef;
    }
    $old;
}

sub visit {
    my($self, $netloc, $time) = @_;
    return unless $netloc;
    $time ||= time;
    my $datetime = epoch2datetime($time);
    my $old = $self->_find_netloc($netloc);
    if ($old) {
        my $count = $old->count + 1;
        $old->count($count);
        $old->visited_on($datetime);
        $old->update;
    }
    else {
        $self->{schema}->resultset('Netloc')->create({
            user_agent_id => $self->{agent_id},
            netloc => $netloc,
            count => 1,
            visited_on => $datetime,
        });
    }
}

sub no_visits {
    my($self, $netloc) = @_;
    $netloc = $self->_find_netloc($netloc);
    return 0 unless $netloc;
    return $netloc->count;
}

sub last_visit {
    my($self, $netloc) = @_;
    $netloc = $self->_find_netloc($netloc);
    return unless $netloc && $netloc->visited_on;
    return datetime2epoch($netloc->visited_on);
}

sub fresh_until {
    my ($self, $netloc, $fresh) = @_;
    $netloc = $self->_find_netloc($netloc, 1);
    my $old = $netloc->fresh_until;
    if (defined $fresh) {
        my $datetime = epoch2datetime($fresh);
        $netloc->fresh_until($datetime);
        $netloc->update;
    }
    return datetime2epoch($old) if $old;
}

sub push_rules {
    my($self, $netloc, @rules) = @_;
    $netloc = $self->_find_netloc($netloc, 1);
    for my $rule(@rules) {
        $self->{schema}->resultset('Rule')->create({
            rule => $rule,
            netloc_id => $netloc->id,
        });
    }
}

sub clear_rules {
    my($self, $netloc) = @_;
    $netloc = $self->_find_netloc($netloc);
    if ($netloc) {
        $self->{schema}->resultset('Rule')->search({netloc_id => $netloc->id})->delete;
    }
}

sub rules {
    my($self, $netloc) = @_;
    my @rules = $self->{schema}->resultset('Rule')->search({
        'netloc.netloc' => $netloc,
        'netloc.user_agent_id' => $self->{agent_id},
    }, {
        join => [qw(netloc)],
    });
    return map { $_->rule } @rules;
}

sub dump {}

sub _find_netloc {
    my($self, $netloc, $create) = @_;
    my $old = $self->{_netloc};
    if ($old && $old->netloc eq $netloc) {
        return $old;
    }
    my $obj = $self->{schema}->resultset('Netloc')->find({
        netloc => $netloc,
        user_agent_id => $self->{agent_id},
    });
    if (!$obj && $create) {
        $obj = $self->{schema}->resultset('Netloc')->create({
            netloc => $netloc,
            user_agent_id => $self->{agent_id},
            count => '0',
        });
    }
    $self->{_netloc} = $obj;
    $obj;
}

sub datetime2epoch {
    my $str = shift;
    return unless $str;
    my $format = DateTime::Format::Strptime->new(
        pattern     => '%Y-%m-%d %H:%M:%S',
        time_zone   => 'local',
    );
    my $dt = $format->parse_datetime($str);
    return $dt->epoch if $dt;
}

sub epoch2datetime {
    my $epoch = shift;
    return unless $epoch;
    my $dt = DateTime->from_epoch(epoch => $epoch);
    $dt->set_time_zone('local');
    return $dt->strftime('%Y-%m-%d %H:%M:%S');
}

1;

__END__