/usr/local/CPAN/LUGS-Events-Parser/LUGS/Events/Parser/Filter.pm
package LUGS::Events::Parser::Filter;
use strict;
use warnings;
use boolean qw(true);
use HTML::Entities qw(decode_entities);
use HTML::Parser ();
our $VERSION = '0.02';
my (@tags, @stack);
sub _init_parser
{
my $self = shift;
my $parser = HTML::Parser->new(
api_version => 3,
start_h => [ \&_start_tag, 'tagname,attr,attrseq' ],
text_h => [ \&_text_tag, 'text' ],
end_h => [ \&_end_tag, 'tagname' ],
);
return $parser;
}
sub _parse_html
{
my $self = shift;
my ($chunk, $html) = @_;
$self->{parser}->parse($chunk);
undef @stack;
return unless @tags;
@$html = @tags;
undef @tags;
}
sub _eof_parser
{
my $self = shift;
$self->{parser}->eof;
}
sub _start_tag
{
my ($tagname, $attr, $attrseq) = @_;
push @stack, { name => $tagname, attr => $attr, attrseq => $attrseq };
}
sub _text_tag
{
my ($text) = @_;
return unless @stack;
$stack[-1]->{text} = $text;
}
sub _end_tag
{
my ($tagname) = @_;
return unless @stack;
if ($stack[-1]->{name} eq $tagname) {
push @tags, {
$tagname => {
map { $_ => $stack[-1]->{$_} }
qw(text attr attrseq),
},
};
pop @stack;
}
}
sub _rewrite_tags
{
my $self = shift;
my ($fields) = @_;
foreach my $field (keys %{$fields->{_html}}) {
my %rewritten;
foreach my $html (@{$fields->{_html}->{$field}}) {
foreach my $tag (keys %$html) {
my @tagnames;
if (%{$html->{$tag}->{attr}}) {
foreach my $attr (keys %{$html->{$tag}->{attr}}) {
if (exists $self->{Tag_handlers}->{"$tag $attr"}) {
push @tagnames, "$tag $attr";
}
}
}
else {
if (exists $self->{Tag_handlers}->{$tag}) {
push @tagnames, $tag;
}
}
foreach my $tagname (@tagnames) {
foreach my $handler (@{$self->{Tag_handlers}->{$tagname}}) {
if ($self->_field_rewrite($field, $handler)) {
unless (exists $rewritten{$tagname}) {
$rewritten{$tagname} = true;
}
my $subst = $handler->{rewrite};
foreach my $subst_item ($self->_subst_data($html, $tag)) {
next unless defined $subst_item->[1];
my ($identifier, $replacement) = @$subst_item;
my $place_holder = uc $identifier;
$subst =~ s/\$$place_holder/$replacement/;
}
my $re = $self->_subst_pattern($html, $tag);
my ($text) = $fields->{$field} =~ $re;
my $replace = defined $html->{$tag}->{text} ? $subst : $text;
$fields->{$field} =~ s{$re}{$replace};
}
}
}
}
}
foreach my $tagname (grep !$rewritten{$_}, keys %{$self->{Tag_handlers}}) {
foreach my $handler (@{$self->{Tag_handlers}->{$tagname}}) {
if ($self->_field_rewrite($field, $handler)) {
if ($tagname !~ /\b\s+?\b/
&& $fields->{$field} =~ m{<$tagname>}
&& $fields->{$field} !~ m{</$tagname>}
) {
my $subst = $handler->{rewrite};
$fields->{$field} =~ s{<$tagname>}{$subst}g;
}
}
}
}
}
}
sub _strip_text
{
my $self = shift;
my ($fields) = @_;
foreach my $field (grep !/^\_/, keys %$fields) {
foreach my $item (@{$self->{Strip_text}}) {
$fields->{$field} =~ s/\Q$item\E//gi;
}
}
}
sub _decode_entities
{
my $self = shift;
my ($fields) = @_;
foreach my $field (grep !/^\_/, keys %$fields) {
decode_entities($fields->{$field});
}
}
sub _field_rewrite
{
my $self = shift;
my ($field, $handler) = @_;
my %rewrite = map { $_ => true } @{$handler->{fields}};
return ($rewrite{$field} || $rewrite{'*'});
}
sub _subst_data
{
my $self = shift;
my ($html, $tag) = @_;
return (map {
[ $_ => $html->{$tag}->{attr}->{$_} ]
} keys %{$html->{$tag}->{attr}}),
(map {
[ $_ => $html->{$tag}->{$_} ]
} grep /^(?:text)$/, keys %{$html->{$tag}});
}
sub _subst_pattern
{
my $self = shift;
my ($html, $tag) = @_;
if (@{$html->{$tag}->{attrseq}}) {
my $attr = join ' ',
map "${_}=\"$html->{$tag}->{attr}->{$_}\"",
@{$html->{$tag}->{attrseq}};
my $text = $html->{$tag}->{text};
return defined $text
? qr{<$tag\s+?\Q$attr\E>$text</$tag>}
: qr{<$tag\s+?\Q$attr\E>(.*?)</$tag>};
}
else {
return qr{<$tag>(.*?)</$tag>};
}
}
1;