| YAPE-HTML documentation | Contained in the YAPE-HTML distribution. |
YAPE MODULESYAPE::HTML::ElementYAPE::HTML::opentagmy $tag = YAPE::HTML::opentag->new($name, $attr, $text, $closed, $impl);my $str = $tag->string;my $str = $tag->fullstring($exclude, $depth);my $attr = $tag->get_attr($name);my @attrs = $tag->get_attr(@names);my %attrs = $tag->get_attr;my $attr = $tag->has_attr($name);my @attrs = $tag->has_attr(@names);$tag->set_attr(%pairs);$tag->rem_attr(@names);my $closed = $tag->closed;my $impl = $tag->implied_closed;my $tagname = $tag->tag;
YAPE::HTML::closetagYAPE::HTML::textYAPE::HTML::commentYAPE::HTML::dtdYAPE::HTML::pimy $pi = YAPE::HTML::pi->new($name, $attr);my $str = $pi->string;my $attr = $pi->get_attr($name);my @attrs = $pi->get_attr(@names);my %attrs = $pi->get_attr;my $attr = $pi->has_attr($name);my @attrs = $pi->has_attr(@names);$pi->set_attr(%pairs);$pi->rem_attr(@names);my $name = $pi->name;
YAPE::HTML::ssimy $ssi = YAPE::HTML::ssi->new($name, $attr);my $str = $ssi->string;my $attr = $ssi->get_attr($name);my @attrs = $ssi->get_attr(@names);my %attrs = $ssi->get_attr;my $attr = $ssi->has_attr($name);my @attrs = $ssi->has_attr(@names);$ssi->set_attr(%pairs);$ssi->rem_attr(@names);my $command = $ssi->command;
YAPE::HTML::Element - sub-classes for YAPE::HTML elements
use YAPE::HTML 'MyExt::Mod'; # this sets up inheritence in MyExt::Mod # see YAPE::HTML documentation
YAPE MODULESThe YAPE hierarchy of modules is an attempt at a unified means of parsing
and extracting content. It attempts to maintain a generic interface, to
promote simplicity and reusability. The API is powerful, yet simple. The
modules do tokenization (which can be intercepted) and build trees, so that
extraction of specific nodes is doable.
This module provides the classes for the YAPE::HTML objects. The base class
for these objects is YAPE::HTML::Element; the four object classes are
YAPE::HTML::opentag, YAPE::HTML::closetag, YAPE::HTML::text, and
YAPE::HTML::comment.
YAPE::HTML::ElementThis class contains fallback methods for the other classes.
my $content = $obj->text;Returns an array reference of objects between an open and close tag, or a string
of plain text for a block of text or a comment. This method merely returns the
TEXT value in the object hash. This returns undef for dtd, pi, and
ssi objects.
my $string = $obj->string;Returns a string representing the single object (for tags, this does not include
the elements found in between the open and close tag). This method merely calls
the object's text method.
my $complete = $obj->fullstring;Returns a string representing the object (and all objects found within it, in
the case of a tag). This method merely calls the object's string method.
my $type = $obj->type;Returns the type of the object: tag, closetag, text, or comment.
YAPE::HTML::opentagThis class represents tags. Object has the following methods:
my $tag = YAPE::HTML::opentag->new($name, $attr, $text, $closed, $impl);Creates a YAPE::HTML::opentag object. Takes five arguments: the name of the
HTML element, a hash reference of attribute-value pairs, an array reference of
objects to be included in between the open and closing tags, whether the tag is
explicitly closed or not, and whether the tag is implicitly closed or not. The
attribute hash reference must have the keys in lowercase text.
my $attr = { src => 'foo.png', alt => 'foo' };
my $img = YAPE::HTML::opentag->new('img', $attr, [], 0, 1);
my $text = [ YAPE::HTML::text->new("Bar!"), $img ];
my $name = YAPE::HTML::opentag->new('a', { name => 'foo' }, $text);
my $str = $tag->string;Creates a string representation of the tag only. This means the tag, and any attributes of the tag only. No closing tag (if any) is returned.
print $img->string; # <img src="foo.png" alt="foo" /> print $name->string; # <a name="foo">
my $str = $tag->fullstring($exclude, $depth);Creates a string representation of the tag, the content enclosed between the open
and closing tags, and the closing tag (if applicable). The method can take two
arguments: an array reference of tag names not to render, and the depth with
which to render tags. The $exclude defaults to none, and $depth defaults
to -1, which means there is no depth limit.
print $img->fullstring; # <img src="foo.png" width=20 height=43 /> print $name->fullstring; # <a name="foo">Bar!<img src="foo.png" alt="foo" /></a> print $name->fullstring(0); # Bar! print $name->fullstring(['img']); # <a name="foo">Bar!</a> print $name->fullstring(1); # <a name="foo">Bar!</a>
my $attr = $tag->get_attr($name);my @attrs = $tag->get_attr(@names);my %attrs = $tag->get_attr;Fetches any number of attribute values from a tag. Note: tags which contain
attributes with no value have a value of undef returned for that attribute --
this is indistinguishable from the undef returned for a tag that does not have
an attribute. This is on the list of things to be fixed. In the meantime, use
the has_attr method beforehand.
print $name->get_attr('name');
# 'foo'
my %list = $img->get_attr;
# alt => 'foo', src => 'foo.png'
my $attr = $tag->has_attr($name);my @attrs = $tag->has_attr(@names);Returns 1 or "" depending on the existence of the attribute in the tag.
my @on = $name->has_attr(qw( name href )); # (1,0)
$tag->set_attr(%pairs);Sets a list of attributes to the associated values for the tag.
$img->set_attr( width => 40, height => 16 );
$tag->rem_attr(@names);Removes (and returns) the specified attributes from a tag. See the caveat above
for the get_attr method about undef values.
my $src = $img->rem_attr('src');
my $closed = $tag->closed;Returns 1 or 0, depending on whether or not the tag is closed. This means
it has a closing tag -- tags like <hr /> are not closed.
my $impl = $tag->implied_closed;Returns 1 or 0, depending on whether or not the tag is implicitly closed
with a / at the end of the tag (like <hr />).
my $tagname = $tag->tag;Returns the name of the HTML element.
print $name->tag; # 'a'
YAPE::HTML::closetagThis class represents closing tags. Object has the following methods:
my $tag = YAPE::HTML::closetag->new($name);Creates a YAPE::HTML::closetag object. Takes one argument: the name of the
HTML element. These objects are never included in the HTML tree structure, since
the parser uses the CLOSED attribute of an opentag object to figure out if
there needs to be a closing tag. However, they are returned in the parsing stage
so that you know when they've been reached.
my $close = YAPE::HTML::closetag->new('a');
my $str = $tag->string;Creates a string representation of the closing tag.
print $close->string; # '</a>'
my $tagname = $tag->tag;Returns the name of the HTML element.
print $close->tag; # 'a'
YAPE::HTML::textThis class represents blocks of plain text. Objects have the following methods:
my $text = YAPE::HTML::text->new($content);Creates a YAPE::HTML::text object. Takes one argument: the text of the block.
my $para = YAPE::HTML::text->new(<< "END"); Perl is not an acronym -- rather "Practical Extraction and Report Language" was developed after the fact. END
YAPE::HTML::commentThis class represents comments. Objects have the following methods:
my $comment = YAPE::HTML::comment->new($content);Creates a YAPE::HTML::comment object. Takes one argument: the text of the
comment.
my $todo = YAPE::HTML::comment->new(<< "END"); This table should be formatted differently. END
my $str = $comment->string;Creates a string representation of the comment, with <!-- before it, and
--> after it.
print $todo->string; # <!--This table should be formatted differently-->
YAPE::HTML::dtdThis class represents <!DOCTYPE> tags. Objects have the following
methods:
my $dtd = YAPE::HTML::dtd->new(\@fields);Creates a YAPE::HTML::dtd object. Takes one argument: an array reference of
the four fields (should be two unquoted strings, and two quoted strings (?)).
my $dtd = YAPE::HTML::dtd->new([
'HTML',
'PUBLIC',
'"-//W3C//DTD HTML 4.01//EN"',
'"http://www.w3.org/TR/html4/strict.dtd"'
]);
my $str = $dtd->string;Creates a string representation of the DTD.
print $dtd->string; # (line breaks added for readability) # <!DOCTYPE HTML PUBLIC # "-//W3C//DTD HTML 4.01//EN" # "http://www.w3.org/TR/html4/strict.dtd">
my @attrs = $dtd->get_attrs;Returns the four attributes of the DTD.
$dtd->set_attrs(@attrs);Sets the four attributes of the DTD (can't be done piecemeal).
YAPE::HTML::piThis class represents process instruction tags. Objects have the following methods:
my $pi = YAPE::HTML::pi->new($name, $attr);Creates a YAPE::HTML::pi object. Takes two arguments: the name of the
processing instruction, and a hash reference of attribute-value pairs. The
attribute hash reference must have the keys in lowercase text.
my $attr = { order => 'alphabetical', need => 'examples' };
my $pi = YAPE::HTML::pi->new(sample => $attr);
my $str = $pi->string;Creates a string representation of the processing instruction.
print $pi->string; # <?sample need="examples" order="alphabetical"?>
my $attr = $pi->get_attr($name);my @attrs = $pi->get_attr(@names);my %attrs = $pi->get_attr;my $attr = $pi->has_attr($name);my @attrs = $pi->has_attr(@names);$pi->set_attr(%pairs);$pi->rem_attr(@names);See the identical methods for opentag objects above.
my $name = $pi->name;Returns the name of the processing instruction.
print $pi->name; # 'first'
YAPE::HTML::ssiThis class represents server-side includes. Objects have the following methods:
my $ssi = YAPE::HTML::ssi->new($name, $attr);Creates a YAPE::HTML::ssi object. Takes two arguments: the SSI command, and
a hash reference of attribute-value pairs. The attribute hash reference must
have the keys in lowercase text.
my $attr = { var => 'REMOTE_HOST' };
my $ssi = YAPE::HTML::ssi->new(echo => $attr);
my $str = $ssi->string;Creates a string representation of the processing instruction.
print $ssi->string; # <!--#echo var="REMOTE_HOST"-->
my $attr = $ssi->get_attr($name);my @attrs = $ssi->get_attr(@names);my %attrs = $ssi->get_attr;my $attr = $ssi->has_attr($name);my @attrs = $ssi->has_attr(@names);$ssi->set_attr(%pairs);$ssi->rem_attr(@names);See the identical methods for opentag objects above.
my $command = $ssi->command;Returns the SSI command's name.
print $ssi->command; # 'echo'
The <script> and <xmp> tags are given special treatment.
When they are encountered, all text up to the first occurrence of the appropriate
closing tag is taken as plain text.
Tag attributes are displayed in the default sort() order.
This is a listing of things to add to future versions of this module.
if, elif, and elseThese need to contain content, since the text between them is associated with a given condition.
Following is a list of known or reported bugs.
Visit YAPE's web site at http://www.pobox.com/~japhy/YAPE/.
The YAPE::HTML::Element documentation, for information on the node classes.
Jeff "japhy" Pinyan CPAN ID: PINYAN japhy@pobox.com http://www.pobox.com/~japhy/
| YAPE-HTML documentation | Contained in the YAPE-HTML distribution. |
package YAPE::HTML::Element; $VERSION = '1.10'; sub text { $_[0]{TEXT} } sub string { $_[0]->text } sub fullstring { $_[0]->string } sub type { $_[0]{TYPE} } package YAPE::HTML::tag; sub new { my ($class, $tag, $attr, $text, $closed, $impl) = @_; bless { TYPE => 'tag', TAG => $tag, ATTR => $attr || {}, TEXT => $text || [], CLOSED => $closed || 0, IMPLIED => $impl || 0, }, $class; } sub string { my $self = shift; my $str = "<$self->{TAG}"; for (sort keys %{ $self->{ATTR} }) { $str .= " $_"; if (defined $self->{ATTR}{$_}) { $str .= "=" . YAPE::HTML::quote($self->{ATTR}{$_}); } } $str .= " /" if $self->{IMPLIED}; $str .= ">"; return $str; } sub fullstring { my $self = shift; my ($taghash, $taglist) = ({}, []); @$taghash{@{ $taglist = shift }} = () if ref $_[0]; my $d = @_ ? $_[0] : -1; my $str; $str = $self->string if $d and not exists $taghash->{$self->{TAG}}; $str .= join "", map $_->fullstring($taglist, $d > 0 ? $d-1 : $d), @{ $self->{TEXT} }; $str .= "</$self->{TAG}>" if $self->{CLOSED} and $d and not exists $taghash->{$self->{TAG}}; return $str; } sub get_attr { my $self = shift; return %{ $self->{ATTR} } if not @_; return $self->{ATTR}{$_[0]} if @_ == 1; return @{ $self->{ATTR} }{map lc, @_}; } sub has_attr { my $self = shift; return exists $self->{ATTR}{lc $_[0]} if @_ == 1; return map exists $self->{ATTR}{lc $_}, @_; } sub set_attr { my $self = shift; while (my $k = shift) { $self->{ATTR}{lc $k} = shift } } sub rem_attr { my $self = shift; delete @{ $self->{ATTR} }{map lc, @_}; } sub closed { $_[0]{CLOSED} } sub implied_closed { $_[0]{IMPLIED} } sub tag { $_[0]{TAG} } package YAPE::HTML::closetag; sub new { my ($class, $tag) = @_; bless { TYPE => 'closetag', TAG => $tag }, $class; } sub string { "</$_[0]{TAG}>" } sub tag { $_[0]{TAG} } package YAPE::HTML::text; sub new { my ($class, $text) = @_; bless { TYPE => 'text', TEXT => $text }, $class; } package YAPE::HTML::comment; sub new { my ($class, $comment) = @_; bless { TYPE => 'comment', TEXT => $comment }, $class; } sub string { "<!--$_[0]{TEXT}-->" } package YAPE::HTML::dtd; sub new { my ($class, $attr) = @_; my $hattr; $attr ||= []; @{$hattr}{@$attr} = (); bless { TYPE => 'dtd', ATTR => $attr, HATTR => $hattr }, $class; } sub get_attr { @{ $_[0]{ATTR} } } sub set_attr { @{ $_[0]{HATTR} }{ @{ $_[0]{ATTR} } = @_[1..$#_] } = () } sub string { "<!DOCTYPE @{ $_[0]{ATTR} }>" } package YAPE::HTML::pi; sub new { my ($class, $name, $attr) = @_; bless { TYPE => 'pi', NAME => $name, ATTR => $attr || {} }, $class; } sub string { my $self = shift; my $str = "<?$self->{NAME}"; for (sort keys %{ $self->{ATTR} }) { $str .= " $_"; if (defined $self->{ATTR}{$_}) { $str .= "=" . YAPE::HTML::quote($self->{ATTR}{$_}); } } $str .= "?>"; return $str; } sub get_attr { my $self = shift; return %{ $self->{ATTR} } if not @_; return $self->{ATTR}{$_[0]} if @_ == 1; return @{ $self->{ATTR} }{map lc, @_}; } sub has_attr { my $self = shift; return exists $self->{ATTR}{lc $_[0]} if @_ == 1; return map exists $self->{ATTR}{lc $_}, @_; } sub set_attr { my $self = shift; while (my $k = shift) { $self->{ATTR}{lc $k} = shift } } sub rem_attr { my $self = shift; delete @{ $self->{ATTR} }{map lc, @_}; } sub name { $_[0]{NAME} } package YAPE::HTML::ssi; sub new { my ($class, $com, $attr) = @_; bless { TYPE => 'ssi', COM => $com, ATTR => $attr || {} }, $class; } sub string { my $self = shift; my $str = "<!--#$self->{COM}"; for (sort keys %{ $self->{ATTR} }) { $str .= " $_"; if (defined $self->{ATTR}{$_}) { $str .= "=" . YAPE::HTML::quote($self->{ATTR}{$_}); } } $str .= "-->"; return $str; } sub get_attr { my $self = shift; return %{ $self->{ATTR} } if not @_; return $self->{ATTR}{$_[0]} if @_ == 1; return @{ $self->{ATTR} }{map lc, @_}; } sub has_attr { my $self = shift; return exists $self->{ATTR}{lc $_[0]} if @_ == 1; return map exists $self->{ATTR}{lc $_}, @_; } sub set_attr { my $self = shift; while (my $k = shift) { $self->{ATTR}{lc $k} = shift } } sub rem_attr { my $self = shift; delete @{ $self->{ATTR} }{map lc, @_}; } sub command { $_[0]{COM} } 1; __END__
=cut