| HTML-TokeParser-Simple documentation | Contained in the HTML-TokeParser-Simple distribution. |
HTML::TokeParser::Simple::Token::Tag::End - Token.pm "end tag" class.
use HTML::TokeParser::Simple;
my $p = HTML::TokeParser::Simple->new( $somefile );
while ( my $token = $p->get_token ) {
# This prints all text in an HTML doc (i.e., it strips the HTML)
next unless $token->is_text;
print $token->as_is;
}
This class does most of the heavy lifting for HTML::TokeParser::Simple. See
the HTML::TokeParser::Simple docs for details.
| HTML-TokeParser-Simple documentation | Contained in the HTML-TokeParser-Simple distribution. |
package HTML::TokeParser::Simple::Token::Tag::End; use strict; use vars qw/ $VERSION $REVISION /; $REVISION = '$Id: End.pm,v 1.3 2005/10/08 19:45:55 ovid Exp $'; $VERSION = '1.0'; use base 'HTML::TokeParser::Simple::Token::Tag'; my %TOKEN = ( tag => 1, text => 2 ); # in order to maintain the 'drop-in replacement' ability with HTML::TokeParser, # we cannot alter the array refs. Thus we must store instance data here. Ugh. my %INSTANCE; sub _init { my $self = shift; if ('E' eq $self->[0]) { $INSTANCE{$self}{offset} = 0; $INSTANCE{$self}{tag} = $self->[1]; } else { $INSTANCE{$self}{offset} = -1; my $tag = $self->[0]; $tag =~ s/^\///; $INSTANCE{$self}{tag} = $tag; } return $self; } sub _get_offset { return $INSTANCE{+shift}{offset} } sub _get_text { return shift->[-1] } sub _get_tag { my $self = shift; return $INSTANCE{$self}{tag}; } sub DESTROY { delete $INSTANCE{+shift} } sub rewrite_tag { my $self = shift; # capture the final slash if the tag is self-closing my ($self_closing) = $self->_get_text =~ m{(\s?/)>$}; $self_closing ||= ''; my $first = $self->is_end_tag ? '/' : ''; my $tag = sprintf '<%s%s%s>', $first, $self->get_tag, $self_closing; $self->_set_text($tag); return $self; } sub return_text { require Carp; Carp::carp('return_text() is deprecated. Use as_is() instead'); goto &as_is; } sub as_is { return shift->_get_text; } sub get_tag { return shift->_get_tag; } # is_foo methods sub is_tag { my $self = shift; return $self->is_end_tag( @_ ); } sub is_end_tag { my ($self, $tag) = @_; return $tag ? $self->_match_tag($tag) : 1; } sub _match_tag { my ($self, $tag) = @_; if ('Regexp' eq ref $tag) { return $self->_get_tag =~ $tag; } else { $tag = lc $tag; $tag =~ s/^\///; return $self->_get_tag eq $tag; } } 1; __END__