| Plagger documentation | Contained in the Plagger distribution. |
Plagger::Plugin::Filter::HTMLScrubber - Scrub feed content
- module: Filter::HTMLScrubber
config:
rules:
style: 0
script: 0
This plugin scrubs feed content using HTML::Scrubber.
All config parameters (except 'no_default_configs') are implemented as HTML::Scrubber's method: value. For example, if you write:
method: value
in the config: section, this plugin will automatically turn the config into the method call:
$scrubber->method('value');
See HTML::Scrubber document for details.
Some rules and default config parameters are set by default. See rules and default methods defined in this module code for details.
If you don't need these settings, use no_default_configs
no_detault_configs: 1
Defaults to 0, which means it uses the default (somewhat secure) config.
Daisuke Murase <typester@cpan.org>
Tatsuhiko Miyagawa
| Plagger documentation | Contained in the Plagger distribution. |
package Plagger::Plugin::Filter::HTMLScrubber; use strict; use base qw( Plagger::Plugin ); use HTML::Scrubber; sub rules { return( img => { src => qr{^http://}, # only URL with http:// alt => 1, # alt attributes allowed '*' => 0, # deny all others }, style => 0, script => 0, ); } sub default { return( '*' => 1, # default rule, allow all attributes 'href' => qr{^(?!(?:java)?script)}i, 'src' => qr{^(?!(?:java)?script)}i, 'cite' => '(?i-xsm:^(?!(?:java)?script))', 'language' => 0, 'name' => 1, # could be sneaky, but hey ;) 'onblur' => 0, 'onchange' => 0, 'onclick' => 0, 'ondblclick' => 0, 'onerror' => 0, 'onfocus' => 0, 'onkeydown' => 0, 'onkeypress' => 0, 'onkeyup' => 0, 'onload' => 0, 'onmousedown' => 0, 'onmousemove' => 0, 'onmouseout' => 0, 'onmouseover' => 0, 'onmouseup' => 0, 'onreset' => 0, 'onselect' => 0, 'onsubmit' => 0, 'onunload' => 0, 'src' => 0, 'type' => 0, 'style' => 0, ); } sub register { my ( $self, $context ) = @_; $context->register_hook( $self, 'update.entry.fixup' => \&update, 'plugin.init' => \&initialize, ); } sub initialize { my($self, $context, $args) = @_; $self->{scrubber} = do { my $scrubber = HTML::Scrubber->new; my $config = $self->conf; my ( %rules, %default ); unless ( delete $config->{no_default_configs} ) { %rules = $self->rules; %default = $self->default; } $scrubber->rules( %rules, %{ delete $config->{rules} || {} } ); $scrubber->default(1, { %default, %{ delete $config->{default} || {} } }); while ( my ( $method, $arg ) = each %$config ) { eval { $scrubber->$method( ref $arg eq 'ARRAY' ? @$arg : ref $arg eq 'HASH' ? %$arg : $arg ); }; $context->error(qq/Invalid method call "$method": $@/) if $@; } $scrubber; }; } sub update { my ( $self, $context, $args ) = @_; if (defined $args->{entry}->body && $args->{entry}->body->is_html) { $context->log(debug => "Scrubbing body for " . $args->{entry}->permalink || '(no-link)'); my $body = $self->{scrubber}->scrub( $args->{entry}->body ); $args->{entry}->body($body); } } 1; __END__