| URL-Grab documentation | Contained in the URL-Grab distribution. |
URL::Grab - Perl extension for blah blah blah
use URL::Grab;
my $cnt_hsh;
# IMPORTANT note (see also #32434):
# Please note, that URL::Grab doesn't return you the content itself as a
# scalar, but instead, returns a hash-reference. The keys of the
# hash are the URLs.
$cnt_hsh = $urlgrabber->grab('http://google.at');
# The content then is available in $cnt_hsh->{'http://google.at'}->{'http://google.at'}.
# Sorry, this is a design issue, that cannot be changed any more :-)
# If you are fetching only one URL, you would better use grab_single!
$cnt_hsh = $urlgrabber->grab(qw(http://google.at));
$cnt_hsh = $urlgrabber->grab([ qw(http://google.at http://asdf.org) ]);
$cnt_hsh = $urlgrabber->grab([ qw(http://google.at http://asdf.org) ], 'http://perl.com');
$cnt_hsh = $urlgrabber->grab_mirrorlist(
'http://linux.duke.edu/projects/yum/',
[qw(http://www.netfilter.org http://www.at.netfilter.org)]
);
# Please note, the following example will return only *one* hash-reference - it will use the
# first that works!!!
$cnt_hsh = $urlgrabber->grab_mirrorlist([qw(
http://www.netfilter.org http://www.at.netfilter.org
)]);
$cnt_hsh = $urlgrabber->grab_mirrorlist([qw(
ftp://linux-kernel.at/packages/yum.conf2
http://filelister.linux-kernel.at/downloads/packages/yum.conf
)]);
$cnt_hsh = $urlgrabber->grab_mirrorlist(
'ftp://linux-kernel.at/packages/yum.conf'
);
$cnt_hsh = $urlgrabber->grab_mirrorlist([qw(
/etc/yum.conf
ftp://linux-kernel.at/packages/yum.conf
)]);
URL::Grab is a perl module that drastically simplifies the fetching of files from within a local source (eg. local filesystem) and/or remote sources (eg. http, ftp). It is designed to be used in programs that need common (but not necessarily simple) url-fetching features. It is extremely simple to drop into an existing program and provides a clean interface to protocol-independant file-access. Best of all, URL::Grab takes care of all those pesky file-fetching details, and lets you focus on whatever it is that your program is written to do!
None by default.
LWP::UserAgent
Project mailinglist: http://lists.linux-kernel.at/wwsympa.fcgi/info/url-grab
Project website: http://projects.linux-kernel.at/URL-Grab/
Oliver Falk, <oliver@linux-kernel.at>
Gary Krueger <gkrueger@browsermedia.com> for pointing out some issues - #32434 - #32433
Copyright (C) 2006-2008 by Oliver Falk <oliver@linux-kernel.at>
This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.8.0 or, at your option, any later version of Perl 5 you may have available.
| URL-Grab documentation | Contained in the URL-Grab distribution. |
package URL::Grab; use 5.008; use strict; use warnings; require Carp; require LWP::UserAgent; use Carp qw/carp/; use LWP::UserAgent; require Exporter; our @ISA = qw(Exporter); our %EXPORT_TAGS = ( 'all' => [ qw() ] ); our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); our @EXPORT = qw(); (our $VERSION) = '$Revision: 1.4 $' =~ /([\d.]+)/; sub new { my $class = shift; my $args = shift; my $self = { }; $self->{retries} = $args->{retries} || 0; $self->{ua} = LWP::UserAgent->new(agent => "URL::Grab $VERSION"); $self->{ua}->{timeout} = $args->{timeout} || 30; bless($self, $class); return $self; } sub grab_single { my $self = shift; my $url = shift; my $retval; if($url =~ /^https?:\/\// || $url =~ /^ftp:\/\//) { my $res = $self->{ua}->get($url); my $retries = 0; while($self->{retries} >= $retries) { if($res->is_success()) { return { $url => $res->content() }; } $retries++; } } else { if($url =~ /^file:\/\// || $url =~ /^\//) { my $tmp_url = $url; $tmp_url =~ s/^file://; if(-f $tmp_url) { my $res; open(FH, $tmp_url); $res .= $_ while(<FH>); close(FH); return { $url => $res }; } else { carp "No such file or directory"; } } else { carp "Unknown transport protocol"; return undef; } } return undef; } sub grab { my $self = shift; my @urls; while(my $arg = shift) { if(ref $arg eq 'ARRAY') { push @urls, $_ foreach(@{$arg}); } else { push @urls, $arg; } } $self->{retval}->{$_} = $self->grab_single($_) foreach (@urls); return $self->{retval}; } sub grab_failover { my $self = shift; my @urls; while (my $arg = shift) { push @urls, $arg; } foreach my $url (@urls) { my $content = $self->grab_single($url); return $content if $content; } } sub grab_mirrorlist { my $self = shift; my @urls; while(my $arg = shift) { push @urls, $arg; } foreach my $mirror (@urls) { if(ref $mirror eq 'SCALAR' || ref \$mirror eq 'SCALAR') { $self->{retval} = $self->grab_single($mirror) } $self->{retval} = $self->grab_failover(@{$mirror}) if ref $mirror eq 'ARRAY'; } return $self->{retval}; } 1; __END__