/usr/local/CPAN/Image-Grab/Image/Grab.pm
package Image::Grab;
use strict;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK $AUTOLOAD);
# $Id: Grab.pm,v 1.6 2002/01/19 21:14:01 mah Exp $
$VERSION = '1.4.2';
use Carp;
use Config;
require HTTP::Request;
require HTML::TreeBuilder;
require URI::URL;
require Image::Grab::RequestAgent;
use POSIX qw(strftime);
require Exporter;
@ISA = qw(Exporter);
@EXPORT_OK = qw(
&expand_url &grab
);
# %fields, new, AUTOLOAD are from perltoot
my %fields = (
cookiefile => undef,
cookiejar => undef,
date => undef,
image => undef,
"index" => undef,
md5 => undef,
refer => undef,
regexp => undef,
type => undef,
ua => undef,
url => undef,
search_url => undef,
debug => undef,
do_posix => ($Config{patchlevel} && $Config{patchlevel} >= 5 and
$Config{baserev} && $Config{baserev} >= 5) ? 1 : undef,
);
sub DESTROY {}
sub new {
my $that = shift;
my $class = ref($that) || $that;
my $self = {
_permitted => \%fields,
%fields,
};
if(@_) {
my %arg = @_;
foreach (keys %arg) {
croak "Can't access `$_' field"
unless exists $self->{_permitted}->{lc($_)};
$self->{lc($_)} = $arg{$_};
}
}
bless ($self, $class);
$self->ua(new Image::Grab::RequestAgent);
$self->{have_DigestMD5} = eval {require Digest::MD5};
$self->{have_MD5} = eval {require MD5;};
$self->{have_magick} = eval {require Image::Magick;};
return $self;
}
sub AUTOLOAD {
my $self = shift;
my $type = ref($self)
or croak "$self is not an object";
my $name = $AUTOLOAD;
$name =~ s/.*://;
unless (exists $self->{_permitted}->{$name} ) {
croak "Can't access `$name' field in class $type";
}
if(@_) {
my $val = shift;
carp "$name: $val" if $self->debug;
return $self->{$name} = $val;
} elsif (defined $self->{$name}) {
return $self->{$name};
}
return undef;
}
# Accessor functions that we have to write.
sub realm {
my $self = shift;
my $type = ref($self)
or croak "$self is not an object";
if($#_ == 2){
$self->ua->register_realm(shift, shift, shift);
return 1;
}
croak "usage: realm(\$realm, \$user, \$pass)";
}
sub getAllURLs {
my $self = shift;
my $type = ref($self)
or croak "$self is not an object";
my $times = (shift or 10);
my $req;
my $count = 0;
my @link;
my @now;
# Need to load Cookie Jar?
$self->loadCookieJar;
@now = localtime;
$self->search_url(strftime $self->search_url, @now)
if defined $self->search_url and defined $self->do_posix;
croak "Need to specify a search_url!" if !defined $self->search_url;
$req = $self->ua->request(new HTTP::Request 'GET', $self->search_url);
# Try $times until successful
while( (!$req->is_success) && $count < $times){
$req = $self->ua->request(new HTTP::Request 'GET', $self->search_url);
$count = $count + 1;
}
# return failure if we couldn't connect within $times tries
if($count == $times && !$req->is_success){
return undef;
}
# Get the base url
my $base_url = $req->base;
# Get the img tags out of the document.
my $parser = new HTML::TreeBuilder;
$parser->parse($req->content);
$parser->eof;
foreach (@{$parser->extract_links(qw(img td body))}) {
push @link, URI::URL::url($$_[0])->abs($base_url)->as_string;
}
$parser->delete;
return @link;
}
sub getRealURL {
my $self = shift;
my $type = ref($self)
or croak "$self is not an object";
my $times = (shift or 10);
carp "getRealURL has been deprecated. Use expand_url.";
$self->expand_url(@_);
}
sub expand_url {
my $self = shift;
my $type = ref($self)
or croak "$self is not an object";
my $times = (shift or 10);
my $req;
my $count = 0;
my @link;
my @now;
# Expand any POSIX time escapes
@now = localtime;
if(defined $self->url) {
$self->url(strftime($self->url, @now))
if defined $self->do_posix;
return $self->url;
}
$self->regexp(strftime($self->regexp, @now))
if defined $self->regexp and defined $self->do_posix;
@link = $self->getAllURLs($times);
return undef if !@link;
# if this is a relative position tag...
if($self->regexp || $self->index) {
my (@match, $re);
$self->refer($self->search_url);
# set index to match first image
$self->index(0) if !defined $self->index;
$re = $self->regexp || '.';
@match = grep {defined && /$re/} @link;
# Return the nth
return $match[$self->index]
if @match;
}
# only if we fail.
return undef;
}
sub loadCookieJar {
my $self = shift;
my $type = ref($self)
or croak "$self is not an object";
# need to do CookieJar initialization?
if($self->cookiefile and !-f $self->cookiefile){
carp $self->cookiefile, " is not a file";
} elsif ($self->cookiefile and !defined $self->cookiejar) {
use HTTP::Cookies;
$self->cookiejar(
HTTP::Cookies::Netscape->new( File => $self->cookiefile,
AutoSave => 0,
));
$self->cookiejar->load();
}
}
sub grab {
my $self = shift;
my $times = 1;
if(ref($self)) {
if(my $c = shift) {
$times = $c;
}
} else {
if($self eq __PACKAGE__) {
$self = Image::Grab->new(@_);
} else {
$self = Image::Grab->new(lc $self, @_);
}
}
my $req;
my $count;
my $rc;
# need to do CookieJar initialization?
$self->loadCookieJar;
# need to find image on page?
my $url = $self->expand_url($times);
# make sure we have a url
croak "Couldn't determine an absolute URL!\n" unless defined $url;
carp "Fetching URL: ", $url if $self->debug;
# Set it up
$req = new HTTP::Request 'GET', $url;
$req->push_header('Referer', $self->refer) if defined $self->refer;
if($self->cookiejar){
$self->cookiejar->add_cookie_header($req);
}
# Knock it down
$count = 0;
do{
$count++;
$rc = $self->ua->request($req);
carp "Got: ", $rc->content
if $self->debug;
} while($count <= $times and not $rc->is_success);
# Did we fail?
return 0 unless $rc->is_success;
carp "Message: ", $rc->message if $self->debug;
# save what we got
$self->image($rc->content);
$self->date($rc->last_modified);
if($self->{have_DigestMD5}) {
$self->md5(Digest::MD5::md5_hex($self->image));
} elsif ($self->{have_MD5}) {
$self->md5(MD5->hexhash($self->image));
}
$self->type($rc->content_type);
$self->image;
}
sub grab_new {
my $self = shift;
my $type = ref($self)
or croak "$self is not an object";
my $tries = shift || 10;
return $self->grab($tries)
unless defined $self->date || defined $self->md5;
my $tmp = $type->new;
$tmp->url($self->url);
$tmp->search_url($self->search_url);
$tmp->index($self->index);
$tmp->regexp($self->regexp);
$tmp->grab;
my $grab_new = 1;
$grab_new = 0
if defined $self->date && $self->date >= $tmp->date;
$grab_new = 0
if defined $self->md5 && $self->md5 eq $tmp->md5;
return $self->grab($tries)
if $grab_new;
return undef;
}
1;
__END__