| Plagger documentation | Contained in the Plagger distribution. |
Plagger::Plugin::Filter::FetchEnclosure::Wget - Fetch enclosures using wget
- module: Filter::FetchEnclosure::Wget
config:
dir: /path/to/download
concurrency: 5
max_requests_per_host: 2
This plugin uses wget command to download enclosure files.
Tatsuhiko Miyagawa
| Plagger documentation | Contained in the Plagger distribution. |
package Plagger::Plugin::Filter::FetchEnclosure::Wget; use strict; use base qw(Plagger::Plugin::Filter::FetchEnclosure); use POE; use POE::Session; use POE::Wheel::Run; sub register { my($self, $context) = @_; $context->register_hook( $self, 'update.entry.fixup' => \&enqueue, 'update.fixup' => \&fetch, ); } sub enqueue { my($self, $context, $args) = @_; for my $enclosure ($args->{entry}->enclosures) { # TODO: do all of this in the base class ::Command my $feed_dir = File::Spec->catfile($self->conf->{dir}, $args->{feed}->id_safe); unless (-e $feed_dir && -d _) { $context->log(info => "mkdir $feed_dir"); mkdir $feed_dir, 0777; } my $path = File::Spec->catfile($feed_dir, $enclosure->filename); if ($enclosure->length && -e $path && -s _ == $enclosure->length) { # TODO: if-none-match $context->log(debug => $enclosure->url . "is already stored in $path"); next; } $context->log(info => "fetch " . $enclosure->url . " to " . $path); my $referer; if ($self->conf->{fake_referer}) { $context->log(debug => "Sending Referer: " . $args->{entry}->permalink); $referer = $args->{entry}->permalink; } my $cookies; my $conf = $context->conf->{user_agent} || {}; if ($conf->{cookies}) { my $cookie_jar = Plagger::Cookies->create($conf->{cookies}); if ($cookie_jar->isa('HTTP::Cookies::Mozilla')) { $cookies = $cookie_jar->{file}; $context->log(debug => "Using cookie file $cookies"); } } # TODO: max connections per domain to respect RFC POE::Session->create( inline_states => { _start => sub { $_[HEAP]->{wheel} = POE::Wheel::Run->new( Program => [ 'wget', $enclosure->url, '-O', $path, '--verbose', '--continue', '--timestamping', '--tries', 5, ($referer ? ('--referer', $referer) : ()), ($cookies ? ('--load-cookies', $cookies) : ()) ], StderrEvent => 'stderr', ErrorEvent => 'wheel_close', CloseEvent => 'wheel_close', ); }, stderr => sub { if ($_[ARG0] =~ /The file is already fully retrieved/) { # ok } elsif ($_[ARG0] =~ /^Length: [(\d,)]+ \[(.*?)\]/) { my($length, $mime_type) = ($1, $2); $length =~ tr/,//d; $enclosure->length($length); $enclosure->type($mime_type); } elsif ($_[ARG0] =~ m!\`\Q$path\E' saved \[(\d+)/\d+\]!) { my $length = $1; $enclosure->local_path($path); $context->log(info => "Download to $path is done [$length]"); } $context->log(debug => $_[ARG0]); }, wheel_close => sub { delete $_[HEAP]->{wheel}; }, }, ); } } sub fetch { Plagger->context->log(info => "Start downloading files using wget."); POE::Kernel->run; Plagger->context->log(info => "w00t! Downloading finished."); } 1; __END__