| File-Comments documentation | Contained in the File-Comments distribution. |
File::Comments::Plugin::HTML - Plugin to detect comments in HTML source code
use File::Comments::Plugin::HTML;
File::Comments::Plugin::HTML is a plugin for the File::Comments framework.
It uses HTML::TokeParser to extracts comments from HTML files.
Copyright 2005 by Mike Schilli, all rights reserved. This program is free software, you can redistribute it and/or modify it under the same terms as Perl itself.
2005, Mike Schilli <cpan@perlmeister.com>
| File-Comments documentation | Contained in the File-Comments distribution. |
########################################### # File::Comments::Plugin::HTML # 2005, Mike Schilli <cpan@perlmeister.com> ########################################### ########################################### package File::Comments::Plugin::HTML; ########################################### use strict; use warnings; use File::Comments::Plugin; use Log::Log4perl qw(:easy); use HTML::TokeParser; our $VERSION = "0.01"; our @ISA = qw(File::Comments::Plugin); ########################################### sub init { ########################################### my($self) = @_; $self->register_suffix(".htm"); $self->register_suffix(".html"); $self->register_suffix(".HTML"); $self->register_suffix(".HTM"); } ########################################### sub type { ########################################### my($self, $target) = @_; return "html"; } ########################################### sub comments { ########################################### my($self, $target) = @_; return $self->extract_html_comments($target); } ########################################### sub stripped { ########################################### my($self, $target) = @_; return $self->strip_html_comments($target); } ########################################### sub extract_html_comments { ########################################### my($self, $target) = @_; my @comments = (); my $stream = HTML::TokeParser->new( \$target->{content}); while(my $token = $stream->get_token()) { next unless $token->[0] eq "C"; $token->[1] =~ s/^<!--//; $token->[1] =~ s/-->$//; push @comments, $token->[1]; } return \@comments; } ########################################### sub strip_html_comments { ########################################### my($self, $target) = @_; require HTML::TreeBuilder; my $root = HTML::TreeBuilder->new(); $root->parse($target->{content}); if(!$root) { WARN "Cannot parse $target->{path}"; return $target->{content}; } my $stripped_html = $root->as_HTML(); # HTML::Element < 4 appends a newline to the HTML # for no apparent reason (CPAN RT#41739) $stripped_html =~ s/\n$//; return $stripped_html; } 1; __END__