| HTML-LinkChanger documentation | Contained in the HTML-LinkChanger distribution. |
HTML::LinkChanger - abstract Perl class to change all linking URLs in HTML.
BEGIN {
package Http2Ftp;
require HTML::LinkChanger;
use vars qw(@ISA);
@ISA = qw(HTML::LinkChanger);
#
# Converting http URLs to FTP urls
#
sub change_url
{
my $self = shift;
my $url = shift;
$url=~s/^http:/ftp:/;
return $url;
}
}
my $http2ftp = new Http2Ftp();
my $converted_HTML = $http2ftp->filter($original_HTML);
HTML::LinkChanger is an abstract class so you need to subclass it to make it do something. See HTML::LinkChanger::Absolutizer for one useful example of such class.
HTML::LinkChanger uses HTML::Tagset::linkElements to change all attributes that contain links that needs to be updated.
This class is a subclass of HTML::Parser. You can call filter() method to convert scalar containing HTML or filter_file() method to convert HTML from file.
You can also call conventional HTML::Parser's parse() and parse_file() methods and call filtered_html() after that to retreive results.
Sergey Chernyshev <sergeyche@cpan.org>
HTML::Parser, HTML::Tagset, HTML::LinkChanger::Absolutizer
| HTML-LinkChanger documentation | Contained in the HTML-LinkChanger distribution. |
package HTML::LinkChanger; # Version: $Id: LinkChanger.pm 4 2007-10-05 15:51:37Z sergey.chernyshev $ use strict; use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); require Exporter; require AutoLoader; @ISA = qw(Exporter AutoLoader HTML::Parser); # Items to export into callers namespace by default. Note: do not export # names by default without a very good reason. Use EXPORT_OK instead. # Do not simply export all your public functions/methods/constants. @EXPORT = qw( ); $VERSION = sprintf("2.%d", q$Rev: 4 $ =~ /(\d+)/); # Preloaded methods go here. # Autoload methods go after =cut, and are processed by the autosplit program. require HTML::Parser; require HTML::Tagset; sub new { my $class = shift; my %args = @_; my $url_filters = $args{'url_filters'}; # reference to array HTML::LinkChanger::URLFilter objects # or reference to one such object my $self = $class->SUPER::new( api_version => 3, default_h => [sub { my $self = shift; $self->{_filtered_html} .= shift }, 'self,text'], start_h => ['link_tag_start', 'self,tagname,text,attr,attrseq'], ); # initializing transforming functions array if (ref($url_filters) eq 'ARRAY') { foreach (@{$url_filters}) { die "Array must contain only HTML::LinkChanger::URLFilter objects" unless UNIVERSAL::isa($_, 'HTML::LinkChanger::URLFilter'); } $self->{url_filters} = $url_filters; } elsif (UNIVERSAL::isa($url_filters, 'HTML::LinkChanger::URLFilter')) { $self->{url_filters} = [$url_filters]; } else { $self->{url_filters} = []; # empty array - can add more filters later } $self; } sub link_tag_start { my($self, $tag, $text, $attr, $attrseq) = @_; my $link_attrs = $HTML::Tagset::linkElements{$tag}; if ($link_attrs) { $link_attrs = [$link_attrs] unless ref $link_attrs; for my $link_attr (@$link_attrs) { next unless exists $attr->{$link_attr}; $attr->{$link_attr} = $self->change_url( $attr->{$link_attr}, $tag, $link_attr ); } my $output='<'.$tag; foreach my $attribute (@$attrseq) { $output.=' '.$attribute.'="'.$attr->{$attribute}.'"'; } $output.='>'; $self->{_filtered_html} .= $output; } else { $self->{_filtered_html} .= $text; } } sub filter { my $self = shift; delete $self->{_filtered_html}; $self->parse(@_); $self->eof; return $self->{_filtered_html}; } sub filter_file { my $self = shift; delete $self->{_filtered_html}; $self->parse_file(@_); $self->eof; return $self->{_filtered_html}; } sub filtered_html { my $self = shift; return $self->{_filtered_html}; } sub change_url { my $self = shift; my $url = shift; # url of the link my $tag = shift; # tag containing a link to change my $attr = shift; # attribute containing a link to change foreach my $filter (@{$self->{url_filters}}) { $url = $filter->url_filter( url => $url, tag => $tag, attr => $attr ); } return $url; # abstract class just keeps everything as it is } 1; __END__