HTML::LinkAdd - Add hyperlinks to phrases in HTML documents


HTML-LinkAdd documentation Contained in the HTML-LinkAdd distribution.

Index


Code Index:

NAME

Top

HTML::LinkAdd - Add hyperlinks to phrases in HTML documents

SYNOPSIS

Top

	use HTML::LinkAdd;
	my $page = new HTML::LinkAdd(
		'testinput1.html', {
			'the clocks were striking thirteen'=>'footnotes.html#OrwellG-1',
			'updated' => ['updated.html', 'View the latest update],
	});
	warn $page -> hyperlinked;
	$page ->save ('output.html');

DESCRIPTION

Top

A simple object that accepts a class reference, a path to a file, and a hash of text-phrase/link-URLs, and supplies a method to obtain the HTML with supplied hyperlinks interpolated.

If the values of the supplied has are anonymous lists, the first value should a URI, the second escaped text to place in the link's title attribute.

The phrase to hyperlink will be skipped if it appears in a context that prevents linking, as defined in %$HTML::LinkAdd::SKIP. This is currently head, script, style>, pre, xmp, textarea, object, and a.

DEPENDENCIES

Top

HTML::TokeParser

CONSTRUCTOR (new)

Top

Accepts class reference, followed by either a filename or reference to a scalar of HTML (as HTML::TokeParser, and a hash of phrases and hyperlinks.

Returns a scalar that is the updated HTML.

PUBLIC METHOD hyperlink

Top

PUBLIC METHOD save

Top

Convenience method to save the object's output slot to filename passed as scalar.

Returns undef on failure, 1 on success.

SEE ALSO

Top

HTML::TokeParse.

TODO

Top

Add support for linking images by source or ID.

AUTHOR

Top

Lee Goddard lgoddard@cpan.org

COPYRIGHT

Top


HTML-LinkAdd documentation Contained in the HTML-LinkAdd distribution.
package HTML::LinkAdd;
our $VERSION = 0.13;	# POD and link titles

use strict;
use warnings;
use HTML::TokeParser;

our $SKIP = { map {$_=>1} qw{
	head pre xmp textarea object a script style
} };

sub new { 
	my ($class,$input) = (shift,shift);
	
	# Lets HTML::TokeParser handle the input file/string checks:-
	warn "HTML::LinkAdd::new called without a class ref?" and return undef unless defined $class;
	warn "Useage: new $class (\$path_to_file or \\\$HTML)" and return undef if not defined $input;

	my $self = bless {
		INPUT => $input,
		HREFS => {},
		output => '',
		skipto => [],
	},$class;

	my %args = ref($_[0]) eq 'HASH'? %{$_[0]} : @_; 
	warn "new requires a hash (or ref to such) as parameter." and return undef if not scalar keys %args;
	
	foreach my $phrase (keys %args){
		my $clean = $phrase;
		$clean =~ s{\s}{ }; # Squash whitespace in the phrase
		$self->{HREFS}->{$clean} = $args{$phrase};
	}
	
	# Create new TokeParser and parse all text, comparing HTML against keys of our targets
	my $p = new HTML::TokeParser ( $self->{INPUT} )
		or warn "Counldn't instantiate HTML::TokeParser!\n$!" and return undef;
	my $token;

	while ($token = $p->get_token and not (@$token[1] eq 'html' and @$token[0] eq 'E') ){
		
		 # warn "@$token[0] @$token[1] - [",  (scalar @{ $self->{skipto} }? join(', ', @{ $self->{skipto} }) : ''), "]\n";

		if (@$token[0] eq 'T'				# Text token
			and not @{ $self->{skipto} }	# and not ignoreing head/pre, etc
		) {
		
			@$token[1] =~ s{\s+}{ };		# Squash whitespace in the text

			# If we got a text node, loop over every user-supplied phrase
			foreach my $key ( keys %{$self->{HREFS}} ) {
				if (@$token[1] =~ m/\Q$key\E/sg){
					my ($title, $href);
					if (ref $self->{HREFS}->{$key}){
						($href, $title) = @{ $self->{HREFS}->{$key} };
					}
					else {
						$href = $self->{HREFS}->{$key};	
					}
					my $subs = "<a href=\"$href\""
					. ($title? " title=\"$title\"" : '')
					. ">$key</a>";
					@$token[1] =~ s/\Q$key\E/$subs/sg;
				}
			}
		};

		my $literal;
		if (@$token[0] eq 'S') { 
			$literal = @$token[4]; 
			# Skip PRE and XMP and TEXTAREA and HEAD
			if (exists $SKIP->{ @$token[1] }){
				unshift @{$self->{skipto}}, @$token[1]; 
			}
		}
		elsif (@$token[0] eq 'E') { 
			$literal = @$token[2];
			if (@{ $self->{skipto} }
			and @$token[1] eq $self->{skipto}->[0]){
				shift @{$self->{skipto}};
			}
		}
		else {
			$literal = @$token[1];
		}
		
		$self->{output} .= $literal;
	} 
	
	return $self;
}


sub hyperlinked { return $_[0]->{output} }


sub save { my ($self,$filename) = (shift,shift);
	warn "HTML::LinkAdd::save requires a filename as parameter 1" and return undef unless defined $filename;
	local *OUT;
	open OUT, ">$filename"
		or warn "HTML::LinkAdd::save could not open the file <$filename> for writing.\n$!" and return undef;
		print OUT $self->{output};
	close OUT;
	return 1;
}

1;	# Return cleanly


__END__;