| LWP-Curl documentation | Contained in the LWP-Curl distribution. |
LWP::Curl - LWP methods implementation with Curl engine
Version 0.07
Use libcurl like LWP, $lwpcurl->get($url), $lwpcurl->timeout(15) don't care about Curl API and don't care about html encode
use LWP::Curl;
my $lwpcurl = LWP::Curl->new();
my $content = $lwpcurl->get('http://search.cpan.org','http://www.cpan.org');
#get the page http://search.cpan.org passing with referer http://www.cpan.org
Creates and returns a new LWP::Curl object, hereafter referred to as the "lwpcurl".
my $lwpcurl = LWP::Curl->new()
timeout => secSet the timeout value in seconds. The default timeout value is 180 seconds, i.e. 3 minutes.
headers => [0|1]Show HTTP headers when return a content. The default is false '0'
user_agent => 'agent86'Set the user agent string. The default is 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)'
followlocation => [0|1]If the spider receive a HTTP 301 ( Redirect ) they will follow?. The default is 1.
auto_encode => [0|1]Turn on/off auto encode urls, for get/post.
maxredirs => numberSet how deep the spider will follow when receive HTTP 301 ( Redirect ). The default is 3.
proxy => $proxyurlSet the proxy in the constructor, $proxyurl will be like: http://myproxy.com:3128/ http://username:password@proxy.com:3128/
libcurl respects the environment variables http_proxy, ftp_proxy, all_proxy etc, if any of those are set. The $lwpcurl->proxy option does however override any possibly set environment variables.
Set how deep the spider will follow when receive HTTP 301 ( Redirect ). The default is 3.
Get content of $url, passando $referer se definido
use LWP::Curl;
my $referer = 'http://www.example.com';
my $get_url = 'http://www.example.com/foo';
my $lwpcurl = LWP::Curl->new();
my $content = $lwpcurl->get($get_url, $referer);
=cut
POST the $hash_form fields in $url, passing $referer if defined
use LWP::Curl;
my $lwpcurl = LWP::Curl->new();
my $referer = 'http://www.examplesite.com/';
my $post_url = 'http://www.examplesite.com/post/';
my $hash_form = {
'field1' => 'value1',
'field2' => 'value2',
}
my $content = $lwpcurl->post($post_url, $hash_form, $referer);
Set timeout, default 180
Turn on/off auto_encode
Copy from L<WWW::Mechanize> begin here ____________________________________ Sets the user agent string to the expanded version from a table of actual user strings. I<$alias> can be one of the following:
then it will be replaced with a more interesting one. For instance, ____________________________________ Copy from L<WWW::Mechanize> ends here, but the idea and the data structure is a copy too :) $lwpcurl->agent_alias( 'Windows IE 6' ); sets your User-Agent to Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)
Set the proxy in the constructor, $proxyurl will be like: http://myproxy.com:3128/ http://username:password@proxy.com:3128/
libcurl respects the environment variables http_proxy, ftp_proxy, all_proxy etc, if any of those are set. The $lwpcurl->proxy option does however override any possibly set environment variables.
To disable proxy set $lwpcurl->proxy('');
$lwpcurl->proxy without argument, return the current proxy
This is a small list of features I'm plan to add. Feel free to contribute with your wishlist and comentaries!
Lindolfo Rodrigues de Oliveira Neto, <lorn at cpan.org>
Please report any bugs or feature requests to bug-lwp-curl at rt.cpan.org, or through
the web interface at http://rt.cpan.org/NoAuth/ReportBug.html?Queue=LWP-Curl. I will be notified, and then you'll
automatically be notified of progress on your bug as I make changes.
You can find documentation for this module with the perldoc command.
perldoc LWP::Curl
You can also look for information at:
Thanks to Breno G. Oliveira for the great tips. Thanks for the LWP and WWW::Mechanize for the inspiration.
Copyright 2009 Lindolfo Rodrigues de Oliveira Neto, all rights reserved.
This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
| LWP-Curl documentation | Contained in the LWP-Curl distribution. |
package LWP::Curl; use warnings; use strict; use WWW::Curl::Easy; use Carp qw(croak); use Data::Dumper; use URI::Escape;
our $VERSION = '0.08';
sub new { # Check for common user mistake croak("Options to LWP::Curl should be key/value pairs, not hash reference") if ref( $_[1] ) eq 'HASH'; my ( $class, %args ) = @_; my $self = {}; my $log = delete $args{log}; my $timeout = delete $args{timeout}; $timeout = 3 * 60 unless defined $timeout; my $headers = delete $args{headers}; $headers = 0 unless defined $headers; my $user_agent = delete $args{user_agent}; $user_agent = 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)' unless defined $user_agent; my $maxredirs = delete $args{max_redirs}; $maxredirs = 3 unless defined $maxredirs; my $followlocation = delete $args{followlocation}; $followlocation = 1 unless defined $followlocation; $self->{auto_encode} = delete $args{auto_encode}; $self->{auto_encode} = 1 unless defined $self->{auto_encode}; $self->{timeout} = $timeout; my $proxy = delete $args{proxy}; $self->{proxy} = undef unless defined $proxy; $self->{retcode} = undef; my $debug = delete $args{debug}; $self->{debug} = 0 unless defined $debug; print STDERR "\n Hash Debug: \n" . Dumper($self) . "\n" if $debug; $self->{agent} = WWW::Curl::Easy->new(); $self->{agent}->setopt( CURLOPT_TIMEOUT, $timeout ); $self->{agent}->setopt( CURLOPT_USERAGENT, $user_agent ); $self->{agent}->setopt( CURLOPT_HEADER, $headers ); $self->{agent}->setopt( CURLOPT_AUTOREFERER, 1 ); # always true $self->{agent}->setopt( CURLOPT_MAXREDIRS, $maxredirs ); $self->{agent}->setopt( CURLOPT_FOLLOWLOCATION, $followlocation ); $self->{agent}->setopt( CURLOPT_SSL_VERIFYPEER, 0 ); $self->{agent}->setopt( CURLOPT_VERBOSE, 0 ); #ubuntu bug $self->{agent}->setopt( CURLOPT_PROXY, $proxy ) if $proxy; return bless $self, $class; }
sub get { my ( $self, $url, $referer ) = @_; if ( !$referer ) { $referer = ""; } $url = uri_escape($url,"[^:./]") if $self->{auto_encode}; $self->{agent}->setopt( CURLOPT_REFERER, $referer ); $self->{agent}->setopt( CURLOPT_URL, $url ); $self->{agent}->setopt( CURLOPT_HTTPGET, 1 ); my $content = ""; open( my $fileb, ">", \$content ); $self->{agent}->setopt( CURLOPT_WRITEDATA, $fileb ); $self->{retcode} = $self->{agent}->perform; if ( $self->{retcode} == 0 ) { print("\nTransfer went ok\n") if $self->{debug}; return $content; } else { croak( "An error happened: Host $url " . $self->{agent}->strerror( $self->{retcode} ) . " ($self->{retcode})\n" ); return undef; } }
sub post { my ( $self, $url, $hash_form, $referer ) = @_; if ( !$referer ) { $referer = ""; } if ( !$hash_form ) { warn(qq{POST Data not defined}); } else { #print STDERR Dumper $hash_form; } my $post_string = ""; foreach my $var ( keys %{$hash_form} ) { $post_string = $post_string . "$var=$hash_form->{$var}"; $post_string = $post_string . "&"; #print STDERR "var: $var - $hash_form->{$var}\n"; } $url = uri_escape($url,"[^:./]") if $self->{auto_encode}; $post_string = uri_escape($post_string,"[^:./]") if $self->{auto_encode}; $self->{agent}->setopt( CURLOPT_POSTFIELDS, $post_string ); $self->{agent}->setopt( CURLOPT_POST, 1 ); $self->{agent}->setopt( CURLOPT_HTTPGET, 0 ); $self->{agent}->setopt( CURLOPT_REFERER, $referer ); $self->{agent}->setopt( CURLOPT_URL, $url ); my $content = ""; open( my $fileb, ">", \$content ); $self->{agent}->setopt( CURLOPT_WRITEDATA, $fileb ); $self->{retcode} = $self->{agent}->perform; if ( $self->{retcode} == 0 ) { #print("Transfer went ok\n"); #print STDERR $content; return $content; #my $response_code = $selfcurl->getinfo(CURLINFO_HTTP_CODE); } else { croak( "An error happened: Host $url " . $self->{agent}->strerror( $self->{retcode} ) . " ($self->{retcode})\n" ); } }
sub timeout { my ( $self, $timeout ) = @_; if ( !$timeout ) { return $self->{timeout}; } $self->{timeout} = $timeout; $self->{agent}->setopt( CURLOPT_TIMEOUT, $self->timeout ); }
sub auto_encode { my ( $self, $value ) = @_; if ( !$value ) { return $self->{auto_encode}; } $self->{auto_encode} = $value; }
sub agent_alias { my ( $self, $alias ) = @_; # CTRL+C from WWW::Mechanize, thanks for petdance # ------------ my %known_agents = ( 'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)', 'Windows Mozilla' => 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6', 'Mac Safari' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en-us) AppleWebKit/85 (KHTML, like Gecko) Safari/85', 'Mac Mozilla' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.4a) Gecko/20030401', 'Linux Mozilla' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624', 'Linux Konqueror' => 'Mozilla/5.0 (compatible; Konqueror/3; Linux)', ); if ( defined $known_agents{$alias} ) { $self->{agent}->setopt( CURLOPT_USERAGENT, $known_agents{$alias} ); } else { warn(qq{Unknown agent alias "$alias"}); } }
sub proxy { my ( $self, $proxy ) = @_; if ( !$proxy ) { return $self->{proxy}; } $self->{proxy} = $proxy; $self->{agent}->setopt( CURLOPT_PROXY, $self->proxy ); }
sub cookie_jar { my ( $self, $proxy ) = @_; if ( !$proxy ) { return $self->{proxy}; } $self->{proxy} = $proxy; $self->{agent}->setopt( CURLOPT_PROXY, $self->proxy ); }
1; # End of LWP::Curl