| HTML-Template-XPath documentation | Contained in the HTML-Template-XPath distribution. |
HTML::Template::XPath - Easy access to XML files from HTML::Template using XPath
In your perl code:
my $xpt = new HTML::Template::XPath(default_lang => 'en',
root_dir => $root_dir
relaxed_parser => 'yes');
my $output = $xpt->process(xpt_filename => $xpt_filename,
xml_filename => $xml_filename,
lang => 'en');
# hash references containing filenames and mtimes, used by PageKit for
# caching
my $file_mtimes = $xpt->file_mtimes;
Your XPath template:
Header
<CONTENT_VAR NAME="document('foo.xml')/aaa/bbb">
<CONTENT_VAR NAME="/ddd/aaa/@bbb">
<CONTENT_VAR NAME="/ddd/eee">
<CONTENT_VAR NAME="eee">
<CONTENT_LOOP NAME="/ddd/fff">
<CONTENT_VAR NAME="@ttt">
<CONTENT_VAR NAME="ggg">
<CONTENT_VAR NAME="hhh"> <CONTENT_VAR NAME="hhh/@qqq">
<CONTENT_VAR NAME="iii">
</CONTENT_LOOP>
Footer
Your XML file:
<ddd>
<aaa bbb="ccc"/>
<eee>jjj</eee>
<fff ttt="uuu">
<ggg>sss</ggg>
<hhh qqq="rrr">lll</hhh>
<iii>mmm</iii>
</fff>
<fff ttt="vvv">
<ggg>nnn</ggg>
<hhh>ooo</hhh>
<iii>ppp</iii>
</fff>
</ddd>
Second XML file (foo.xml)
<aaa>
<bbb>Content from second XML file</bbb>
</aaa>
Output:
Header
Content from second XML file
ccc
jjj
jjj
uuu
sss
lll rrr
mmm
vvv
nnn
ooo
ppp
Footer
This is an easy to use templating system for extracting content from XML files. It is based on HTML::Template's <TMPL_VAR> and <TMPL_LOOP> tags and uses XML::LibXML's XPath function to extract the requested XML content.
It has built-in support for language localization.
Processes an XPath Template file and XML file in a specified language.
my $output = $xpt->process(xpt_filename => $xpt_filename, xml_filename => $xml_filename, lang => 'en'); my $output = $xpt->process(xpt_scalarref => $xpt_scalarref, xml_filename => $xml_filename, lang => 'en'); my $output = $xpt->process(xpt_scalarref => $xpt_scalarref, xml_text => $xml_text, lang => 'en');
In the third form, $xml_text should have no external XML file references, or the code is unlikely to work. Note that this has not been tested.
Processes the template and returns a hash reference containing language codes as keys and outputs as values.
my $lang_output = $xpt->process_all_lang(xpt_filename => $xpt_filename,
xml_filename => $xml_filename);
# english output
my $output_en = $lang_output->{'en'};
T.J. Mather (tjmather@tjmather.com)
If you use the same XML::LibXML query for a CONTENT_LOOP as well as a CONTENT_VAR tag, then HTML::Template will croak. A workaround is to append a "/." at the end of the xpath query.
Fixes, Bug Reports, Docs have been generously provided by:
Boris Zentner Tatsuhiko Miyagawa Matt Churchyard
Thanks!
Copyright (c) 2002 T.J. Mather. All rights Reserved.
This package is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
| HTML-Template-XPath documentation | Contained in the HTML-Template-XPath distribution. |
package HTML::Template::XPath; use strict; use XML::LibXML; use HTML::Template; use IO::File; use IO::Handle; use Carp; use vars qw($VERSION); $VERSION = '0.20'; use constant XML_SOURCE_FILE => 1; use constant XML_SOURCE_TEXT => 2; use constant XML_SOURCE_LIBXML => 3; # these global vars are initialised and then they are readonly! # this is done here mainly for speed. use vars qw /$key_value_pattern/; # --------------------- $1 -------------------------- # $2 $3 $4 $5 $key_value_pattern = qr!(\s+(\w+)(?:\s*=\s*(?:"([^"]*)"|\'([^\']*)\'|(\w+)))?)!; #" # public methods sub new { my ($class, @options) = @_; my $self = { @options }; bless $self, $class; $self->{'default_lang'} ||= 'en'; $self->{'relaxed_parser'} ||= 'no'; $self->{'template_class'} ||= 'HTML::Template'; return $self; } sub file_mtimes { return shift->{file_mtimes}; } sub process { my ($xpt, %opt) = @_; # clear out data from preview call to process delete $xpt->{file_mtimes}; delete $xpt->{lang}; my $xpt_template_ref; if($opt{xpt_filename}){ local($/) = undef; my $filename = "$xpt->{root_dir}/$opt{xpt_filename}"; my $xpt_handle = IO::File->new($filename) or die "can't open $filename for reading"; my $xpt_template = <$xpt_handle>; $xpt_template_ref = \$xpt_template; $xpt_handle->close; } elsif ($opt{xpt_scalarref}){ $xpt_template_ref = $opt{xpt_scalarref}; } $opt{lang} ||= $xpt->{default_lang}; if ($xpt->{relaxed_parser} eq 'yes') { # new experimental parser # see comments in PageKit::View::_preparse_model_tags # remove unneeded tags $$xpt_template_ref =~ s^<(!--)?\s*/CONTENT_(?:VAR|ELSE)\s*(?(1)--)>^^sig; # translate all content end tags to tmpl tags $$xpt_template_ref =~ s^<(!--)?\s*/CONTENT_(\w+)\s*(?(1)--)>^</TMPL_$2>^sig; $$xpt_template_ref =~ s^<(!--)?\s*CONTENT_(\w+(?:$key_value_pattern)*)\s*/?(?(1)--)>^<TMPL_$2>^sig; } else { # remove unneeded tags $$xpt_template_ref =~ s^</CONTENT_(?:VAR|ELSE)>^^ig; # translate all content end tags to tmpl tags $$xpt_template_ref =~ s^</CONTENT_(\w+)>^</TMPL_$1>^ig; $$xpt_template_ref =~ s^<CONTENT_(\w+(?:$key_value_pattern)*)/?>^<TMPL_$1>^ig; } $opt{xml_filename} and $xpt->{_xml_source} = XML_SOURCE_FILE; $opt{xml_text} and $xpt->{_xml_source} = XML_SOURCE_TEXT; # not implemented yet.. # $opt{xml_parser} and $xpt->{_xml_source} = XML_SOURCE_LIBXML; $opt{xml_filename} ||= $opt{xml_text}; die "No XML source - expected filename, text, or parser" unless $xpt->{_xml_source}; $xpt->_fill_in_content($xpt_template_ref, $opt{xml_filename}, $opt{lang}, $opt{check_for_other_lang}); return $xpt->{lang}->{$opt{lang}}; } sub process_all_lang { my ($xpt, %opt) = @_; $opt{check_for_other_lang} = 1; $xpt->process(%opt); return $xpt->{lang}; } # private methods sub _add_content_mtime { my ($xpt, $xml_filename) = @_; if ($xpt->{_xml_source} == XML_SOURCE_FILE) { my $filename = "$xpt->{root_dir}/$xml_filename"; return if exists $xpt->{file_mtimes}->{$filename}; my $mtime = (stat($filename))[9]; $xpt->{file_mtimes}->{$filename} = $mtime; } else { # Hrm.. use some sort of hashing of the actual text here? $xpt->{file_mtimes}->{_text} = time(); } } sub _fill_in_content { my ($xpt, $xpt_template_ref, $default_xml_filename, $lang, $check_for_other_lang) = @_; $xpt->{language_parsed}->{$lang} = 1; my $tmpl; eval { $tmpl = $xpt->{template_class}->new(scalarref => $xpt_template_ref, # don't die when we set a parameter that is not in the template die_on_bad_params=>0, # built in __FIRST__, __LAST__, etc vars loop_context_vars=>1, case_sensitive=>1, max_includes => 50); }; if($@){ die "Can't load template (preprocessing): $@"; } my @params = $tmpl->query; for my $name (@params){ # next unless $name =~ m!^pkit_content::!; my $type = $tmpl->query(name => $name); my ($xml_filename, $xpath) = $xpt->_get_document_xpath($name,$default_xml_filename); $xpt->_add_content_mtime($xml_filename); my $value; if($type eq 'LOOP'){ $value = $xpt->_fill_in_content_loop($xpt_template_ref, $default_xml_filename, $tmpl, $xml_filename, $lang, [ $name ], $check_for_other_lang); } else { if($check_for_other_lang){ my $langs = $xpt->_get_xpath_langs(xml_filename => $xml_filename, xpath => $xpath); for my $l (@$langs){ $xpt->_fill_in_content($xpt_template_ref, $default_xml_filename, $l, 0) unless exists $xpt->{language_parsed}->{$l}; } } my $nodeset = $xpt->_get_xpath_nodeset(xml_filename => $xml_filename, xpath => $xpath, lang => $lang); # get value of first node $value = $nodeset->string_value; } $tmpl->param($name => $value); } # html, filtered for content $xpt->{lang}->{$lang} = \$tmpl->output; } sub _fill_in_content_loop { my ($xpt, $xpt_template_ref, $default_xml_filename, $tmpl, $context_xml_filename, $lang, $loops, $check_for_other_lang, $context) = @_; my ($xpath) = ($xpt->_get_document_xpath($loops->[-1],$default_xml_filename))[1]; my @inner_param_names = $tmpl->query(loop => $loops); my %inner_param; for my $name (@inner_param_names){ next if $name =~ m!^__(inner|last|odd|first)__$!; my ($xml_filename, $xpath) = $xpt->_get_document_xpath($name,$default_xml_filename); $xpt->_add_content_mtime($xml_filename); $inner_param{$name} = {type => $tmpl->query(name => [ @$loops, $name ]), xml_filename => $xml_filename, xpath => $xpath}; } my $nodeset = $xpt->_get_xpath_nodeset(xml_filename => $context_xml_filename, xpath => $xpath, lang => $lang, context => $context); my $array_ref = []; for my $node ($nodeset->get_nodelist){ my $loop_param = {}; while (my ($name, $hash_ref) = each %inner_param){ my $value; my $context = $node; if($hash_ref->{type} eq 'LOOP'){ $value = $xpt->_fill_in_content_loop($xpt_template_ref, $default_xml_filename, $tmpl, $hash_ref->{xml_filename}, $lang, [ @$loops, $name], $check_for_other_lang, $node); } else { if($check_for_other_lang){ my $langs = $xpt->_get_xpath_langs(xml_filename => $hash_ref->{xml_filename}, xpath => $hash_ref->{xpath}, context => $context); for my $l (@$langs){ $xpt->_fill_in_content($xpt_template_ref, $default_xml_filename, $l, 0) unless exists $xpt->{language_parsed}->{$l}; } } my $nodeset = $xpt->_get_xpath_nodeset(xml_filename => $hash_ref->{xml_filename}, xpath => $hash_ref->{xpath}, lang => $lang, context => $context); # get value of first node $value = $nodeset->string_value; } $loop_param->{"$name"} = $value; } push @$array_ref, $loop_param; } return $array_ref; } sub _get_document_xpath { my ($xpt, $name, $default_xml_filename) = @_; my ($xml_filename, $xpath); if($name =~ m!^document\('?(.*?)'?\)(.*)$!){ ($xml_filename, $xpath) = ($1, $2); unless($xml_filename =~ s!^/!!){ # return relative to $default_xml_filename (my $default_xml_dir = $default_xml_filename) =~ s![^/]*$!!; $xml_filename = "$default_xml_dir$xml_filename"; while ($xml_filename =~ s![^/]*/\.\./!!) {}; } } else { ($xml_filename, $xpath) = ($default_xml_filename, $name); } return ($xml_filename, $xpath); } sub _get_xp { my ($xpt, $xml_filename, $context) = @_; if ( $context ) { return $context; } elsif(exists $xpt->{xp}->{_hash_or_file($xpt->{_xml_source}, $xml_filename)}){ return $xpt->{xp}->{_hash_or_file($xpt->{_xml_source}, $xml_filename)}; } my $xp; if ($xpt->{_xml_source} == XML_SOURCE_FILE) { my $filename = "$xpt->{root_dir}/$xml_filename"; unless( -f $filename ) { warn "Can't load content file $filename"; return; } my $parser = XML::LibXML->new; my $xpt_handle = IO::File->new("<$filename") or die "can not open $filename"; $xp = $parser->parse_fh($xpt_handle); $xpt_handle->close; # get default context (root XML element) $xpt->{root_element_node}->{_hash_or_file($xpt->{_xml_source}, $xml_filename)} = $xp->documentElement; $xpt->{xp}->{_hash_or_file($xpt->{_xml_source}, $xml_filename)} = $xp; } elsif ($xpt->{_xml_source} == XML_SOURCE_TEXT) { my $parser = XML::LibXML->new; $xp = $parser->parse_string($xml_filename); $xpt->{root_element_node}->{_hash_or_file($xpt->{_xml_source}, $xml_filename)} = $xp->documentElement; $xpt->{xp}->{_hash_or_file($xpt->{_xml_source}, $xml_filename)} = $xp; } return $xp; } sub _get_xpath_langs { my ($xpt, %arg) = @_; my $xml_filename = $arg{xml_filename}; my $context = $arg{context} || $xpt->{root_element_node}->{_hash_or_file($xpt->{_xml_source}, $xml_filename)}; my $xp = $xpt->_get_xp($xml_filename, $context); return [] unless $xp; my $xpath = $arg{xpath}; $context ||= $xpt->{root_element_node}->{_hash_or_file($xpt->{_xml_source}, $xml_filename)}; my $nodeset = $context->findnodes($xpath); my %lang; my $return_nodeset = XML::LibXML::NodeList->new; for my $node ($nodeset->get_nodelist) { my $nodeset = $node->findnodes(q{ancestor-or-self::*[@xml:lang]}); for my $node ($nodeset->get_nodelist) { my $lang = $node->getAttributeNS('http://www.w3.org/XML/1998/namespace','lang'); $lang{$lang} = 1; } $return_nodeset->push($node) if $nodeset->size > 0; } my @lang = keys %lang; return \@lang; } sub _get_xpath_nodeset { my ($xpt, %arg) = @_; my $xml_filename = $arg{xml_filename}; my $return_nodeset = XML::LibXML::NodeList->new; my $context = $arg{context}; my $xp = $xpt->_get_xp($xml_filename, $context); return $return_nodeset unless $xp; my $xpath = $arg{xpath}; my $lang = $arg{lang}; $context ||= $xpt->{root_element_node}->{_hash_or_file($xpt->{_xml_source}, $xml_filename)}; my $nodeset = $context->find($xpath); my @nodelist = $nodeset->get_nodelist; # first attempt get nodes whose ancestor-or-self[@xml:lang] eq $lang for my $node (@nodelist) { # lifted from XPath::Function::lang my $node_lang = $node->findvalue('(ancestor-or-self::*[@xml:lang]/@xml:lang)[last()]') || $xpt->{default_lang}; if (substr(lc($node_lang), 0, length($lang)) eq $lang) { $return_nodeset->push($node); } } return $return_nodeset if $return_nodeset->size > 0; # If no nodes are found in the preferred language, then return # node(s) which are in the default language for my $node (@nodelist) { my $node_lang = $node->findvalue('(ancestor-or-self::*[@xml:lang]/@xml:lang)[last()]') || $xpt->{default_lang}; if (substr(lc($node_lang), 0, length($xpt->{default_lang})) eq $xpt->{default_lang}) { $return_nodeset->push($node); } } return $return_nodeset if $return_nodeset->size > 0; # pass 3, just return all the nodes # (even thought it's not in the right language) # this is undocumented and subject to change! return $nodeset; } #============================================================ # Returns the filename or a simple hash of the text. #============================================================ sub _hash_or_file { my ($type, $data) = @_; return $data if $type == XML_SOURCE_FILE; use Digest::MD5 qw(md5_base64); return md5_base64($data); } 1; __END__