Plagger::Plugin::Filter::StripRSSAd - Strip RSS Ads from feed content


Plagger documentation Contained in the Plagger distribution.

Index


Code Index:

NAME

Top

Plagger::Plugin::Filter::StripRSSAd - Strip RSS Ads from feed content

SYNOPSIS

Top

  - module: Filter::StripRSSAd

DESCRIPTION

Top

This plugin strips RSS context based ads from feed content, like Google AdSense or rssad.jp. It uses quick regular expression to strip the images and map tags.

AUTHOR

Top

Tatsuhiko Miyagawa, Masahiro Nagano

SEE ALSO

Top

Plagger


Plagger documentation Contained in the Plagger distribution.

package Plagger::Plugin::Filter::StripRSSAd;
use strict;
use base qw( Plagger::Plugin );

use DirHandle;

sub init {
    my $self = shift;
    $self->SUPER::init(@_);
    Plagger->context->autoload_plugin({ module => 'Filter::BloglinesContentNormalize' });

    $self->load_assets('*.yaml', sub { $self->load_yaml(@_) });
    $self->load_assets('*.pat',  sub { $self->load_regexp(@_)});
}

sub load_regexp {
    my($self, $file, $base) = @_;
    Plagger->context->log(debug => "Load regexp $file");

    open my $fh, '<', $file or Plagger->context->error("$file: $!");
    my $re = join '', <$fh>;
    chomp($re);

    push @{$self->{pattern}}, { site => $base, re => qr/$re/ };
}

sub load_yaml {
    my($self, $file, $base) = @_;
    Plagger->context->log(debug => "Load YAML $file");

    my $pattern = eval { YAML::LoadFile($file) }
        or Plagger->context->error("$file: $@");

    push @{$self->{pattern}}, { site => $base, %$pattern };
}

sub register {
    my($self, $context) = @_;
    $context->register_hook(
        $self,
        'update.entry.fixup' => \&update,
    );
}

sub update {
    my($self, $context, $args) = @_;
    my $body = $args->{entry}->body;

    for my $pattern (@{ $self->{pattern} }) {
        if (my $re = $pattern->{re}) {
            if (my $count = $body =~ s!$re!defined($1) ? $1 : ''!egs) {
                Plagger->context->log(info => "Stripped $pattern->{site} Ad on " . $args->{entry}->link);
            }
        } elsif (my $cond = $pattern->{condition}) {
            local $args->{body} = $body;
            if (eval $cond && $pattern->{strip}) {
                $args->{feed}->delete_entry($args->{entry});
                Plagger->context->log(info => "Stripped Ad entry " . $args->{entry}->link);
            } elsif ($@) {
                Plagger->context->log(error => "Error evaluating $cond: $@");
            }
        }
    }

    $args->{entry}->body($body);
}

1;

__END__