Plagger::Plugin::Filter::HTMLScrubber - Scrub feed content


Plagger documentation Contained in the Plagger distribution.

Index


Code Index:

NAME

Top

Plagger::Plugin::Filter::HTMLScrubber - Scrub feed content

SYNOPSIS

Top

  - module: Filter::HTMLScrubber
    config:
      rules:
        style: 0
        script: 0

DESCRIPTION

Top

This plugin scrubs feed content using HTML::Scrubber.

All config parameters (except 'no_default_configs') are implemented as HTML::Scrubber's method: value. For example, if you write:

    method: value

in the config: section, this plugin will automatically turn the config into the method call:

    $scrubber->method('value');

See HTML::Scrubber document for details.

CONFIG

Top

no_default_configs

Some rules and default config parameters are set by default. See rules and default methods defined in this module code for details.

If you don't need these settings, use no_default_configs

   no_detault_configs: 1

Defaults to 0, which means it uses the default (somewhat secure) config.

AUTHOR

Top

Daisuke Murase <typester@cpan.org>

Tatsuhiko Miyagawa

SEE ALSO

Top

Plagger, HTML::Scrubber


Plagger documentation Contained in the Plagger distribution.

package Plagger::Plugin::Filter::HTMLScrubber;
use strict;
use base qw( Plagger::Plugin );

use HTML::Scrubber;

sub rules {
    return(
        img => {
            src => qr{^http://},    # only URL with http://
            alt => 1,               # alt attributes allowed
            '*' => 0,               # deny all others
        },
        style  => 0,
        script => 0,
    );
}

sub default {
    return(
        '*'    => 1,                        # default rule, allow all attributes
        'href' => qr{^(?!(?:java)?script)}i,
        'src'  => qr{^(?!(?:java)?script)}i,
        'cite'     => '(?i-xsm:^(?!(?:java)?script))',
        'language' => 0,
        'name'        => 1,                 # could be sneaky, but hey ;)
        'onblur'      => 0,
        'onchange'    => 0,
        'onclick'     => 0,
        'ondblclick'  => 0,
        'onerror'     => 0,
        'onfocus'     => 0,
        'onkeydown'   => 0,
        'onkeypress'  => 0,
        'onkeyup'     => 0,
        'onload'      => 0,
        'onmousedown' => 0,
        'onmousemove' => 0,
        'onmouseout'  => 0,
        'onmouseover' => 0,
        'onmouseup'   => 0,
        'onreset'     => 0,
        'onselect'    => 0,
        'onsubmit'    => 0,
        'onunload'    => 0,
        'src'         => 0,
        'type'        => 0,
        'style'       => 0,
    );
}

sub register {
    my ( $self, $context ) = @_;

    $context->register_hook(
        $self,
        'update.entry.fixup' => \&update,
        'plugin.init'        => \&initialize,
    );
}

sub initialize {
    my($self, $context, $args) = @_;

    $self->{scrubber} = do {
        my $scrubber = HTML::Scrubber->new;
        my $config   = $self->conf;

        my ( %rules, %default );
        unless ( delete $config->{no_default_configs} ) {
            %rules   = $self->rules;
            %default = $self->default;
        }
        $scrubber->rules( %rules, %{ delete $config->{rules} || {} } );
        $scrubber->default(1, { %default, %{ delete $config->{default} || {} } });

        while ( my ( $method, $arg ) = each %$config ) {
            eval {
                $scrubber->$method(
                      ref $arg eq 'ARRAY' ? @$arg
                    : ref $arg eq 'HASH'  ? %$arg
                    : $arg );
            };
            $context->error(qq/Invalid method call "$method": $@/) if $@;
        }

        $scrubber;
    };
}

sub update {
    my ( $self, $context, $args ) = @_;

    if (defined $args->{entry}->body && $args->{entry}->body->is_html) {
        $context->log(debug => "Scrubbing body for " . $args->{entry}->permalink || '(no-link)');
        my $body = $self->{scrubber}->scrub( $args->{entry}->body );
        $args->{entry}->body($body);
    }
}

1;

__END__