/usr/local/CPAN/Image-Grab/Image/Grab.pm


package Image::Grab;

use strict;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK $AUTOLOAD);

# $Id: Grab.pm,v 1.6 2002/01/19 21:14:01 mah Exp $
$VERSION = '1.4.2';

use Carp;
use Config;
require HTTP::Request;
require HTML::TreeBuilder;
require URI::URL;
require Image::Grab::RequestAgent;
use POSIX qw(strftime);

require Exporter;

@ISA = qw(Exporter);
@EXPORT_OK = qw(
  &expand_url &grab
);

# %fields, new, AUTOLOAD are from perltoot

my %fields = (
	      cookiefile => undef,
	      cookiejar  => undef,
	      date       => undef,
	      image      => undef,
	      "index"    => undef,
	      md5        => undef,
	      refer      => undef,
	      regexp     => undef,
	      type       => undef,
	      ua         => undef,
	      url        => undef,
	      search_url => undef,
	      debug      => undef,
	      do_posix   => ($Config{patchlevel} && $Config{patchlevel} >= 5 and
			     $Config{baserev} && $Config{baserev}    >= 5) ? 1 : undef,
	     );

sub DESTROY {}

sub new {
  my $that  = shift;
  my $class = ref($that) || $that;
  my $self = {
	      _permitted => \%fields,
	      %fields,
	     };

  if(@_) {
    my %arg = @_;

    foreach (keys %arg) {
      croak "Can't access `$_' field"
	unless exists $self->{_permitted}->{lc($_)};
      $self->{lc($_)} = $arg{$_};
    }
  }

  bless ($self, $class);
  $self->ua(new Image::Grab::RequestAgent);
  $self->{have_DigestMD5} = eval {require Digest::MD5};
  $self->{have_MD5} = eval {require MD5;};
  $self->{have_magick} = eval {require Image::Magick;};
  return $self;
}

sub AUTOLOAD {
  my $self = shift;
  my $type = ref($self)
    or croak "$self is not an object";

  my $name = $AUTOLOAD;
  $name =~ s/.*://;

  unless (exists $self->{_permitted}->{$name} ) {
    croak "Can't access `$name' field in class $type";
  }

  if(@_) {
    my $val = shift;
    carp "$name: $val" if $self->debug;
    return $self->{$name} = $val;
  } elsif (defined $self->{$name}) {
    return $self->{$name};
  }

  return undef;

}

# Accessor functions that we have to write.
sub realm {
  my $self = shift;
  my $type = ref($self)
    or croak "$self is not an object";

  if($#_ == 2){
    $self->ua->register_realm(shift, shift, shift);
    return 1;
  } 

  croak "usage: realm(\$realm, \$user, \$pass)";
}

sub getAllURLs {
  my $self = shift;
  my $type = ref($self)
    or croak "$self is not an object";
  my $times = (shift or 10);
  my $req;
  my $count = 0;
  my @link;
  my @now;

  # Need to load Cookie Jar?
  $self->loadCookieJar;

  @now = localtime;
  $self->search_url(strftime $self->search_url, @now) 
    if defined $self->search_url and defined $self->do_posix;
  croak "Need to specify a search_url!" if !defined $self->search_url;
  $req = $self->ua->request(new HTTP::Request 'GET', $self->search_url);

  # Try $times until successful
  while( (!$req->is_success) && $count < $times){
    $req = $self->ua->request(new HTTP::Request 'GET', $self->search_url);
    $count = $count + 1;
  }

  # return failure if we couldn't connect within $times tries
  if($count == $times && !$req->is_success){
    return undef;
  }

  # Get the base url
  my $base_url = $req->base;

  # Get the img tags out of the document.
  my $parser = new HTML::TreeBuilder;
  $parser->parse($req->content);
  $parser->eof;
  foreach (@{$parser->extract_links(qw(img td body))}) {
    push @link, URI::URL::url($$_[0])->abs($base_url)->as_string;
  }
  $parser->delete;

  return @link;
}

sub getRealURL {
  my $self = shift;
  my $type = ref($self)
    or croak "$self is not an object";
  my $times = (shift or 10);

  carp "getRealURL has been deprecated.  Use expand_url.";
  $self->expand_url(@_);
}

sub expand_url {
  my $self = shift;
  my $type = ref($self)
    or croak "$self is not an object";
  my $times = (shift or 10);
  my $req;
  my $count = 0;
  my @link;
  my @now;

  # Expand any POSIX time escapes
  @now = localtime;

  if(defined $self->url) {
    $self->url(strftime($self->url, @now)) 
      if defined $self->do_posix;
    return $self->url;
  }
  $self->regexp(strftime($self->regexp, @now))
    if defined $self->regexp and defined $self->do_posix;

  @link = $self->getAllURLs($times);
  return undef if !@link;

  # if this is a relative position tag...
  if($self->regexp || $self->index) {
    my (@match, $re);

    $self->refer($self->search_url);
    # set index to match first image
    $self->index(0) if !defined $self->index;
    $re = $self->regexp || '.';
    @match = grep {defined && /$re/} @link;
    # Return the nth
    return $match[$self->index]
      if @match;
  }

  # only if we fail.
  return undef;
}

sub loadCookieJar {
  my $self = shift;
  my $type = ref($self)
    or croak "$self is not an object";

  # need to do CookieJar initialization?
  if($self->cookiefile and !-f $self->cookiefile){
    carp $self->cookiefile, " is not a file";
  } elsif ($self->cookiefile and !defined $self->cookiejar) {
    use HTTP::Cookies;

    $self->cookiejar(
      HTTP::Cookies::Netscape->new( File => $self->cookiefile,
				    AutoSave => 0,
				  ));
    $self->cookiejar->load();
  }

}

sub grab {
  my $self = shift;
  my $times = 1;

  if(ref($self)) {
    if(my $c = shift) {
      $times = $c;
    }
  } else {
    if($self eq __PACKAGE__) {
      $self = Image::Grab->new(@_);
    } else {
      $self = Image::Grab->new(lc $self, @_);
    }
  }
  my $req;
  my $count;
  my $rc;

  # need to do CookieJar initialization?
  $self->loadCookieJar;

  # need to find image on page?
  my $url = $self->expand_url($times);

  # make sure we have a url
  croak "Couldn't determine an absolute URL!\n" unless defined $url;
  carp "Fetching URL: ", $url if $self->debug;

  # Set it up
  $req = new HTTP::Request 'GET', $url;
  $req->push_header('Referer', $self->refer) if defined $self->refer;
  if($self->cookiejar){
    $self->cookiejar->add_cookie_header($req);
  }

  # Knock it down
  $count = 0;
  do{
    $count++;
    $rc = $self->ua->request($req);
    carp "Got: ", $rc->content
      if $self->debug;
  } while($count <= $times and not $rc->is_success);

  # Did we fail?
  return 0 unless $rc->is_success;

  carp "Message: ", $rc->message if $self->debug;

  # save what we got
  $self->image($rc->content);
  $self->date($rc->last_modified);

  if($self->{have_DigestMD5}) {
    $self->md5(Digest::MD5::md5_hex($self->image));
  } elsif ($self->{have_MD5}) {
    $self->md5(MD5->hexhash($self->image));
  }


  $self->type($rc->content_type);

  $self->image;
}

sub grab_new {
  my $self = shift;
  my $type = ref($self)
    or croak "$self is not an object";
  my $tries = shift || 10;

  return $self->grab($tries)
    unless defined $self->date || defined $self->md5;

  my $tmp = $type->new;
  $tmp->url($self->url);
  $tmp->search_url($self->search_url);
  $tmp->index($self->index);
  $tmp->regexp($self->regexp);
  $tmp->grab;

  my $grab_new = 1;

  $grab_new = 0
    if defined $self->date && $self->date >= $tmp->date;
  $grab_new = 0
    if defined $self->md5 && $self->md5 eq $tmp->md5;

  return $self->grab($tries)
    if $grab_new;
  return undef;
}

1;
__END__