| WWW-Google-Images documentation | Contained in the WWW-Google-Images distribution. |
WWW::Google::Images - Google Images Agent
Version 0.6.5
This module may be used search images on Google. Its interface is heavily inspired from WWW::Google::Groups.
use WWW::Google::Images;
$agent = WWW::Google::Images->new(
server => 'images.google.com',
proxy => 'my.proxy.server:port',
);
$result = $agent->search('flowers', limit => 10);
while ($image = $result->next()) {
$count++;
print $image->content_url();
print $image->context_url();
print $image->save_content(base => 'image' . $count);
print $image->save_context(base => 'page' . $count);
}
Creates and returns a new WWW::Google::Images object.
Optional parameters:
use $server as server.
use $proxy as proxy on port $port.
Perform a search for $query, and return a WWW::Google::Images::SearchResult object.
Optional parameters:
limit the maximum number of result returned to $limit.
limit the minimum width of result returned to $width pixels.
limit the minimum width of result returned to $height pixels.
limit the minimum size of result returned to $size ko.
limit the maximum width of result returned to $width pixels.
limit the maximum width of result returned to $height pixels.
limit the maximum size of result returned to $size ko.
limit the width/height ratio of result returned to $ratio (+/- tolerance).
set the tolerance limit for the ratio limit to $ratio_delta (default: 1.0).
limit the result returned to those whose filename matches case-sensitive $regex regular expression.
limit the result returned to those whose filename matches case-insensitive $regex regular expression.
Copyright (C) 2004-2006, INRIA.
This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
Guillaume Rousse <grousse@cpan.org>
| WWW-Google-Images documentation | Contained in the WWW-Google-Images distribution. |
# $Id: Images.pm,v 1.31 2007/12/29 01:09:12 rousse Exp $ package WWW::Google::Images;
use WWW::Mechanize; use WWW::Google::Images::SearchResult; use HTML::Parser; use strict; use warnings; our $VERSION = '0.6.5';
sub new { my ($class, %arg) = @_; foreach my $key (qw(server proxy)){ next unless $arg{$key}; $arg{$key} = 'http://'.$arg{$key} if $arg{$key} !~ m,^\w+?://,o; } my $a = WWW::Mechanize->new(onwarn => undef, onerror => undef); $a->proxy(['http'], $arg{proxy}) if $arg{proxy}; my $self = bless { _server => ($arg{server} || 'http://images.google.com/'), _proxy => $arg{proxy}, _agent => $a, }, $class; return $self; }
sub search { my ($self, $query, %arg) = @_; warn "No query given, aborting" and return unless $query; $arg{limit} = 10 unless defined $arg{limit}; $self->{_agent}->get($self->{_server}); $self->{_agent}->submit_form( form_number => 1, fields => { q => $query } ); my @images; my $page = 1; LOOP: { do { push(@images, $self->_extract_images(($arg{limit} ? $arg{limit} - @images : 0), %arg)); last if $arg{limit} && @images == $arg{limit}; } while ($self->_next_page(++$page)); } return WWW::Google::Images::SearchResult->new($self->{_agent}, @images); } sub _next_page { my ($self, $page) = @_; return $self->{_agent}->follow_link(text => $page) } sub _extract_images { my ($self, $limit, %arg) = @_; my @images; my @data; my @links = $self->{_agent}->find_all_links( url_regex => qr/imgurl/ ); if ( $arg{min_size} || $arg{max_size} || $arg{min_width} || $arg{max_width} || $arg{min_height} || $arg{max_height} || $arg{ratio} ) { my $parser = HTML::Parser->new(); my $pattern = qr/ ^ (\d+) \s x \s (\d+) \s - \s (\d+)k (?: - \w*)? $ /ox; my $callback = sub { my ($text) = @_; if ($text =~ $pattern) { push(@data, { width => $1, height => $2, size => $3 }); } }; $parser->handler(text => $callback, 'text'); $parser->parse($self->{_agent}->content()); } my ($upper, $lower); if ($arg{ratio}) { my $delta = $arg{ratio_delta} || 1.0; $lower = $arg{ratio} - $delta; $upper = $arg{ratio} + $delta; } for my $i (0 .. $#links) { next if $arg{min_size} && $data[$i]->{size} < $arg{min_size}; next if $arg{max_size} && $data[$i]->{size} > $arg{max_size}; next if $arg{min_width} && $data[$i]->{width} < $arg{min_width}; next if $arg{max_width} && $data[$i]->{width} > $arg{max_width}; next if $arg{min_height} && $data[$i]->{height} < $arg{min_height}; next if $arg{max_height} && $data[$i]->{height} > $arg{max_height}; if ($arg{ratio}) { my $ratio = $data[$i]->{width} / $data[$i]->{height}; next if $ratio < $lower || $ratio > $upper; } $links[$i]->url() =~ /imgurl=([^&]+)&imgrefurl=([^&]+)/; my $content = $1; my $context = $2; next if $arg{regex} && $content !~ /$arg{regex}/; next if $arg{iregex} && $content !~ /$arg{iregex}/i; push(@images, { content => $content, context => $context}); last if $limit && @images == $limit; } return @images; }
1;