| Plagger documentation | Contained in the Plagger distribution. |
Plagger::Plugin::Filter::StripRSSAd - Strip RSS Ads from feed content
- module: Filter::StripRSSAd
This plugin strips RSS context based ads from feed content, like Google AdSense or rssad.jp. It uses quick regular expression to strip the images and map tags.
Tatsuhiko Miyagawa, Masahiro Nagano
| Plagger documentation | Contained in the Plagger distribution. |
package Plagger::Plugin::Filter::StripRSSAd; use strict; use base qw( Plagger::Plugin ); use DirHandle; sub init { my $self = shift; $self->SUPER::init(@_); Plagger->context->autoload_plugin({ module => 'Filter::BloglinesContentNormalize' }); $self->load_assets('*.yaml', sub { $self->load_yaml(@_) }); $self->load_assets('*.pat', sub { $self->load_regexp(@_)}); } sub load_regexp { my($self, $file, $base) = @_; Plagger->context->log(debug => "Load regexp $file"); open my $fh, '<', $file or Plagger->context->error("$file: $!"); my $re = join '', <$fh>; chomp($re); push @{$self->{pattern}}, { site => $base, re => qr/$re/ }; } sub load_yaml { my($self, $file, $base) = @_; Plagger->context->log(debug => "Load YAML $file"); my $pattern = eval { YAML::LoadFile($file) } or Plagger->context->error("$file: $@"); push @{$self->{pattern}}, { site => $base, %$pattern }; } sub register { my($self, $context) = @_; $context->register_hook( $self, 'update.entry.fixup' => \&update, ); } sub update { my($self, $context, $args) = @_; my $body = $args->{entry}->body; for my $pattern (@{ $self->{pattern} }) { if (my $re = $pattern->{re}) { if (my $count = $body =~ s!$re!defined($1) ? $1 : ''!egs) { Plagger->context->log(info => "Stripped $pattern->{site} Ad on " . $args->{entry}->link); } } elsif (my $cond = $pattern->{condition}) { local $args->{body} = $body; if (eval $cond && $pattern->{strip}) { $args->{feed}->delete_entry($args->{entry}); Plagger->context->log(info => "Stripped Ad entry " . $args->{entry}->link); } elsif ($@) { Plagger->context->log(error => "Error evaluating $cond: $@"); } } } $args->{entry}->body($body); } 1; __END__