| Plagger documentation | Contained in the Plagger distribution. |
Plagger::Plugin::CustomFeed::GoogleNews - Create Google News custom feed
- module: Subscription::Config
config:
feed:
- http://news.google.com/news?ned=jp&rec=0&topic=s
- http://news.google.co.jp/news?hl=ja&ned=jp&q=%E5%9B%B2%E7%A2%81
- module: CustomFeed::GoogleNews
This plugin creates a custom feed off of Google News HTML pages. Use with EntryFullText plugin to get full content and accurate datetime of articles.
Tatsuhiko Miyagawa
| Plagger documentation | Contained in the Plagger distribution. |
package Plagger::Plugin::CustomFeed::GoogleNews; use strict; use base qw( Plagger::Plugin ); use Plagger::UserAgent; use Plagger::Util; use URI; use URI::QueryParam; sub register { my($self, $context) = @_; $context->register_hook( $self, 'customfeed.handle' => \&handle, ); } sub handle { my($self, $context, $args) = @_; if ($args->{feed}->url =~ m!^http://news\.google\.(?:co\.jp|com)/! && $args->{feed}->url !~ /output=(?:rss|atom)/) { $self->aggregate($context, $args); return 1; } return; } sub aggregate { my($self, $context, $args) = @_; my $url = URI->new($args->{feed}->url); # ned=jp -> ned=tjp my $ned = $url->query_param('ned') || 'us'; $ned = "t$ned" unless $ned =~ /^t/; $url->query_param(ned => $ned); $context->log(info => "GET $url"); my $agent = Plagger::UserAgent->new; my $res = $agent->fetch($url, $self); if ($res->is_error) { $context->log(error => "GET $url failed: " . $res->status); return; } my $content = Plagger::Util::decode_content($res); my $title = Plagger::Util::extract_title($content); my $feed = Plagger::Feed->new; $feed->title($title); $feed->link($args->{feed}->url); while ($content =~ m!<a href="(http://[^"]*)" id=r-\d[^>]*>(.*?)</a>!g) { my($link, $title) = ($1, $2); $title =~ s!<b>(.*?)</b>!$1!g; my $entry = Plagger::Entry->new; $entry->title($title); $entry->link($link); $feed->add_entry($entry); } $context->update->add($feed); } 1; __END__