| XML-DOM2 documentation | Contained in the XML-DOM2 distribution. |
XML::DOM2::Parser - Sax based xml parser for XML::DOM2
This parser was constructed using XML::SAX::PurePerl which Was known at the time to lack a number of calls which where important for dealing with things like document type and text formating and xml decls. hopfully in the future this will be fixed and this method will be able to take advantage of those part of an xml page.
Create a new parser object.
Return the document object
Called at the start of a document.
Called at the end of a document.
Start a new xml element
Ends an xml element
Handle part of a cdata by concatination
Handle combined text strings as cdata
WARNING: Comments are currently removed!
Never used by parser.
Never used by parser.
Never used by parser.
We want to store the below details for the document creation
Decode the xml decleration information.
Martin Owens, doctormo@cpan.org
| XML-DOM2 documentation | Contained in the XML-DOM2 distribution. |
package XML::DOM2::Parser;
use strict; use base qw(XML::SAX::Base); use Carp;
sub new { my ($proto, %opts) = @_; $opts{'inline'} = 1; if(not $opts{'document'}) { croak "Unable to parse xml without document"; } return bless \%opts, $proto; }
sub document { my ($self) = @_; return $self->{'document'}; }
sub start_document { my ($self, $doc) = @_; $self->{'inline'} = 0; }
sub end_document { my ($self) = @_; }
sub start_element { my ($self, $node) = @_; $self->text; # ELEMENT # LocalName - The name of the element minus any namespace prefix it may have come with in the document. # NamespaceURI - The URI of the namespace associated with this element, or the empty string for none. # Attributes - A set of attributes as described below. # Name - The name of the element as it was seen in the document (i.e. including any prefix associated with it) # Prefix - The prefix used to qualify this elementâs namespace, or the empty string if none. my $element; my $parent = $self->{'parent'}; if(not $parent and not $self->{'inline'}) { $self->document->doctype->name($node->{'LocalName'}); } if( $node->{'LocalName'} ) { if($parent) { # Name spaces my $ns = $self->document->getNamespace( $node->{'Prefix'} ) if $node->{'Prefix'}; warn "Could not get namespace for node: ".$node->{'Prefix'}."\n" if $node->{'Prefix'} && not defined($ns); $element = $parent->createChildElement($node->{'LocalName'}, document => $self->document, namespace => $ns, ); } else { # This would be a root element (document) $self->{'parents'} = []; $element = $self->document->createElement( $node->{'LocalName'}, document => $self->document ); $self->document->documentElement($element); # Name spaces, we do this first so later on we don't try adding attributes # into the document element that have namespaces yet to be added in the hash # order (perl!) my $ns = $self->document->getNamespace( 'xmlns' ); foreach my $a (keys(%{$node->{'Attributes'}})) { my $attribute = $node->{'Attributes'}->{$a}; if($attribute->{'Name'} eq 'xmlns') { # warn "Namespace ".$attribute->{'Prefix'}.':'.$attribute->{'Name'}.'='.$attribute->{'Value'}." to ".$node->{'Name'}."\n"; $element->setAttribute( $attribute->{'LocalName'}, $attribute->{'Value'} ); } elsif($attribute->{'Prefix'} eq 'xmlns') { # warn "NSW ".$attribute->{'Prefix'}.':'.$attribute->{'Name'}.'='.$attribute->{'Value'}." to ".$node->{'Name'}."\n"; $self->document->createNamespace($attribute->{'LocalName'}, $attribute->{'Value'}); } else { next; } delete($node->{'Attributes'}->{$a}); } } } # ATTRIBUTES {} # LocalName - The name of the attribute minus any namespace prefix it may have come with in the document. # NamespaceURI - The URI of the namespace associated with this attribute. If the attribute had no prefix, then this consists of just the empty string. # Name - The attributeâs name as it appeared in the document, including any namespace prefix. # Prefix - The prefix used to qualify this attributeâs namepace, or the empty string if none. # Value - VALUE. foreach my $attribute (values(%{$node->{'Attributes'}})) { if($attribute->{'Prefix'}) { my $ns = $self->document->getNamespace( $attribute->{'Prefix'} ); if(not $ns) { warn "Could not get namespace for attribute: ".$attribute->{'Prefix'}." (".$attribute->{'NamespaceURI'}.")\n"; next; } $element->setAttributeNS( $ns, $attribute->{'LocalName'}, $attribute->{'Value'} ); } else { $element->setAttribute( $attribute->{'LocalName'}, $attribute->{'Value'} ); } } push(@{$self->{'parents'}}, $self->{'parent'})if $self->{'parent'}; $self->{'parent'} = $element; }
sub end_element { my ($self, $element) = @_; $self->text; # ELEMENT # LocalName - The name of the element minus any namespace prefix it may have come with in the document. # NamespaceURI - The URI of the namespace associated with this element, or the empty string for none. # Name - The name of the element as it was seen in the document (i.e. including any prefix associated with it) # Prefix - The prefix used to qualify this elementâs namespace, or the empty string if none. $self->{'parent'} = pop @{$self->{'parents'}}; }
sub characters { my ($self, $text) = @_; $text = $text->() if ref($text) eq 'CODE'; # We wish to keep track of text characters, and # and deal with text once other elements are found $self->{'text'} = '' if not defined($self->{'-text'}); $self->{'text'} .= $text->{'Data'}; }
sub text { my ($self) = @_; if($self->{'text'}) { my $text = $self->{'text'}; if($text =~ /\S/) { $self->{'parent'}->cdata($text); } delete($self->{'text'}); } }
sub comment { my ($self, $comment) = @_; $self->text; # warn "Comment '".$comment->{'Data'}."'\n"; # Data }
sub start_cdata { print STDERR "START CDATA\n"; }
sub end_cdata { print STDERR "END CDATA\n"; }
sub processing_instruction { print STDERR "PI\n"; }
sub doctype_decl { my ($self, $dtd) = @_; my $doc = $self->document; # Name # SystemId # PublicId warn "Setting doctype name to ".$dtd->{'Name'}."\n"; $doc->doctype->name($dtd->{'Name'}); $doc->doctype->systemId($dtd->{'SystemId'}); $doc->doctype->publicId($dtd->{'PublicId'}); # $self->{'dtd'} = $dtd; }
sub xml_decl { my ($self, $xml) = @_; my $doc = $self->document; # Version # Encoding # Standalone $doc->version($xml->{'Version'}); $doc->encoding($xml->{'Encoding'}); $doc->standalone($xml->{'Standalone'}); # $self->{'xml'} = $xml; }
1;