URL::Grab - Perl extension for blah blah blah


URL-Grab documentation Contained in the URL-Grab distribution.

Index


Code Index:

NAME

Top

URL::Grab - Perl extension for blah blah blah

SYNOPSIS

Top

  use URL::Grab;
  my $cnt_hsh;

  # IMPORTANT note (see also #32434):
  # Please note, that URL::Grab doesn't return you the content itself as a
  # scalar, but instead, returns a hash-reference. The keys of the
  # hash are the URLs.

  $cnt_hsh = $urlgrabber->grab('http://google.at');

  # The content then is available in $cnt_hsh->{'http://google.at'}->{'http://google.at'}.
  # Sorry, this is a design issue, that cannot be changed any more :-)
  # If you are fetching only one URL, you would better use grab_single!

  $cnt_hsh = $urlgrabber->grab(qw(http://google.at));
  $cnt_hsh = $urlgrabber->grab([ qw(http://google.at http://asdf.org) ]);
  $cnt_hsh = $urlgrabber->grab([ qw(http://google.at http://asdf.org) ], 'http://perl.com');

  $cnt_hsh = $urlgrabber->grab_mirrorlist(
    'http://linux.duke.edu/projects/yum/',
    [qw(http://www.netfilter.org http://www.at.netfilter.org)]
  );

  # Please note, the following example will return only *one* hash-reference - it will use the
  # first that works!!!
  $cnt_hsh = $urlgrabber->grab_mirrorlist([qw(
     http://www.netfilter.org http://www.at.netfilter.org
  )]);

  $cnt_hsh = $urlgrabber->grab_mirrorlist([qw(
    ftp://linux-kernel.at/packages/yum.conf2
    http://filelister.linux-kernel.at/downloads/packages/yum.conf
  )]);

  $cnt_hsh = $urlgrabber->grab_mirrorlist(
    'ftp://linux-kernel.at/packages/yum.conf'
  );

  $cnt_hsh = $urlgrabber->grab_mirrorlist([qw(
    /etc/yum.conf
    ftp://linux-kernel.at/packages/yum.conf
  )]);

DESCRIPTION

Top

URL::Grab is a perl module that drastically simplifies the fetching of files from within a local source (eg. local filesystem) and/or remote sources (eg. http, ftp). It is designed to be used in programs that need common (but not necessarily simple) url-fetching features. It is extremely simple to drop into an existing program and provides a clean interface to protocol-independant file-access. Best of all, URL::Grab takes care of all those pesky file-fetching details, and lets you focus on whatever it is that your program is written to do!

EXPORT

None by default.

SEE ALSO

Top

LWP::UserAgent

Project mailinglist: http://lists.linux-kernel.at/wwsympa.fcgi/info/url-grab

Project website: http://projects.linux-kernel.at/URL-Grab/

AUTHOR

Top

Oliver Falk, <oliver@linux-kernel.at>

THANKS

Top

Gary Krueger <gkrueger@browsermedia.com> for pointing out some issues - #32434 - #32433

COPYRIGHT AND LICENSE

Top


URL-Grab documentation Contained in the URL-Grab distribution.

package URL::Grab;

use 5.008;
use strict;
use warnings;
require Carp;
require LWP::UserAgent;

use Carp qw/carp/;
use LWP::UserAgent;

require Exporter;

our @ISA = qw(Exporter);
our %EXPORT_TAGS = ( 'all' => [ qw() ] );

our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );

our @EXPORT = qw();

(our $VERSION) = '$Revision: 1.4 $' =~ /([\d.]+)/;

sub new {
	my $class = shift;
	my $args = shift;
	my $self = { };

	$self->{retries} = $args->{retries} || 0;
	$self->{ua} = LWP::UserAgent->new(agent => "URL::Grab $VERSION");
	$self->{ua}->{timeout} = $args->{timeout} || 30;
	
	bless($self, $class);
	return $self;
}

sub grab_single {
	my $self = shift;
	my $url = shift;

	my $retval;
	if($url =~ /^https?:\/\// || $url =~ /^ftp:\/\//) {
		my $res = $self->{ua}->get($url);
		my $retries = 0;
		while($self->{retries} >= $retries) {
			if($res->is_success()) {
				return { $url => $res->content() };
			}
			$retries++;
		}
	} else {
		if($url =~ /^file:\/\// || $url =~ /^\//) {
			my $tmp_url = $url;
			$tmp_url =~ s/^file://;
			if(-f $tmp_url) {
				my $res;
				open(FH, $tmp_url);
				$res .= $_ while(<FH>);
				close(FH);
				return { $url => $res };
			} else {
				carp "No such file or directory";
			}
		} else {
			carp "Unknown transport protocol";
			return undef;
		}
	}
	return undef;
}

sub grab {
	my $self = shift;
	my @urls;
	while(my $arg = shift) {
		if(ref $arg eq 'ARRAY') { push @urls, $_ foreach(@{$arg});
		} else { push @urls, $arg; }
	}
	$self->{retval}->{$_} = $self->grab_single($_) foreach (@urls);
	return $self->{retval};
}

sub grab_failover {
	my $self = shift;
	my @urls;

	while (my $arg = shift) {
		push @urls, $arg;
	}

	foreach my $url (@urls) {
		my $content = $self->grab_single($url);
		return $content if $content;
	}
}

sub grab_mirrorlist {
	my $self = shift;
	my @urls;
	while(my $arg = shift) {
		push @urls, $arg;
	}
	foreach my $mirror (@urls) {
		if(ref $mirror eq 'SCALAR' || ref \$mirror eq 'SCALAR') {
			$self->{retval} = $self->grab_single($mirror)
		}
		$self->{retval} = $self->grab_failover(@{$mirror}) if ref $mirror eq 'ARRAY';
	}
	return $self->{retval};
}

1;
__END__