/usr/local/CPAN/Bryar/Template/Plugin/html2text.pm


package Template::Plugin::html2text;

use warnings;
use strict;

use Template::Plugin::Filter;
use base 'Template::Plugin::Filter';

use HTML::Parser;

my %inside;
my (@links, @notes);
my $text;

sub filter {
	my ($self, $text_in) = @_;

	my $p = HTML::Parser->new(
		api_version   => 3,
		report_tags => [qw(a img b i p pre ul li abbr table th tr td)],
		handlers	=> [
			start => [ \&tag_handler,  'self, tagname, attr, event, "+1"' ],
			end   => [ \&tag_handler,  'self, tagname, attr, event, "-1"' ],
			text  => [ \&text_handler, 'self, dtext' ],
		],
		marked_sections => 1,
	);

	undef @links;
	undef @notes;
	undef %inside;

	$text = '';
	$p->parse($text_in);

	$text .= "\n" if @links;
	my $i = 0;
	foreach (@links) {
		$text .= "[$i] $links[$i]\n";
		$i++;
	}
	$text .= "\n" if @links;

	$i = 0;
	foreach (@notes) {
		$text .= "{$i} $notes[$i]\n";
		$i++;
	}
	$text .= "\n" if @notes;

	return $text;
}

sub init {
	my ($self) = @_;
	my $name = $self->{_CONFIG}->{name} || 'html2text';
	$self->install_filter($name);
	return $self;
}

##############################################################################
sub tag_handler {
	my ($self, $tag, $attr, $event, $num) = @_;

	$inside{$tag} += $num;
	if ($tag eq 'a') {
		push(@links, $attr->{href}) if exists $attr->{href};
		$text .= "[$#links]" if $event eq 'end';
	} elsif ($tag eq 'img' and $event eq 'start') {
		push(@links, $attr->{src}) if exists $attr->{src};
		$text .= '[' . ($attr->{alt} || 'IMG') . ']';
		$text .= "[$#links]";
	} elsif ($tag eq 'abbr') {
		push(@notes, $attr->{title}) if exists $attr->{title};
		$text .= "{$#notes}" if $event eq 'end';
	} elsif ($tag eq 'b') {
		$text .= '*';
	} elsif ($tag eq 'i') {
		$text .= '/';
	} elsif ($tag eq 'p' or $tag eq 'pre') {
		$text .= "\n\n" if $event eq 'end';
	} elsif ($tag eq 'table') {
		$text .= "\n" if $event eq 'end';
	} elsif ($tag eq 'tr' or $tag eq 'th') {
		$text .= "\n" if $event eq 'end';
	} elsif ($tag eq 'td') {
		$text .= "\t" if $event eq 'end';
	} elsif ($tag eq 'ul') {
	} elsif ($tag eq 'li') {
		$text .= "\n * " if $event eq 'start';
	} else {
		$text .= "{$tag:$num} [$event]";
	}
}

sub text_handler {
	my ($self, $s) = @_;

	return if $inside{script} or $inside{style};

	$s =~ s/^\n+//g;
	$s =~ s/\s+/ /g if not $inside{pre};
	$s =~ s/^\s+$//;
	$text .= $s;
}

1;