XML::SAX::Expat - SAX2 Driver for Expat (XML::Parser)


XML-SAX-Expat documentation Contained in the XML-SAX-Expat distribution.

Index


Code Index:

NAME

Top

XML::SAX::Expat - SAX2 Driver for Expat (XML::Parser)

SYNOPSIS

Top

  use XML::SAX::Expat;
  use XML::SAX::MyFooHandler;
  my $h = XML::SAX::MyFooHandler->new;
  my $p = XML::SAX::Expat->new(Handler => $h);
  $p->parse_file('/path/to/foo.xml');

DESCRIPTION

Top

This is an implementation of a SAX2 driver sitting on top of Expat (XML::Parser) which Ken MacLeod posted to perl-xml and which I have updated.

It is still incomplete, though most of the basic SAX2 events should be available. The SAX2 spec is currently available from http://cvs.sourceforge.net/cgi-bin/viewcvs.cgi/~checkout~/perl-xml/libxml-perl/doc/sax-2.0.html?rev=HEAD&content-type=text/html

A more friendly URL as well as a PODification of the spec are in the works.

METHODS

Top

The methods defined in this class correspond to those listed in the PerlSAX2 specification, available above.

FEATURES AND CAVEATS

Top

supported_features

Returns:

  * http://xml.org/sax/features/external-general-entities
  * http://xml.org/sax/features/external-parameter-entities
  * [ Features supported by ancestors ]

Turning one of the first two on also turns the other on (this maps to the XML::Parser ParseParamEnts option). This may be fixed in the future, so don't rely on this behaviour.

MISSING PARTS

Top

XML::Parser has no listed callbacks for the following events, which are therefore not presently generated (ways may be found in the future):

  * ignorable_whitespace
  * skipped_entity
  * start_entity / end_entity
  * resolve_entity

Ways of signalling them are welcome. In addition to those, set_document_locator is not yet called.

TODO

Top

  - reuse Ken's tests and add more

AUTHOR

Top

Robin Berjon; stolen from Ken Macleod, ken@bitsko.slc.ut.us, and with suggestions and feedback from perl-xml. Currently maintained by Bjoern Hoehrmann http://bjoern.hoehrmann.de/.

COPYRIGHT AND LICENSE

Top

SEE ALSO

Top

XML::Parser::PerlSAX


XML-SAX-Expat documentation Contained in the XML-SAX-Expat distribution.

###
# XML::SAX::Expat - SAX2 Driver for Expat (XML::Parser)
# Originally by Robin Berjon
###

package XML::SAX::Expat;
use strict;
use base qw(XML::SAX::Base);
use XML::NamespaceSupport   qw();
use XML::Parser             qw();

use vars qw($VERSION);
$VERSION = '0.40';


#,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,#
#`,`, Variations on parse `,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,#
#```````````````````````````````````````````````````````````````````#

#-------------------------------------------------------------------#
# CharacterStream
#-------------------------------------------------------------------#
sub _parse_characterstream {
    my $p       = shift;
    my $xml     = shift;
    my $opt     = shift;

    my $expat = $p->_create_parser($opt);
    my $result = $expat->parse($xml);
    $p->_cleanup;
    return $result;
}
#-------------------------------------------------------------------#

#-------------------------------------------------------------------#
# ByteStream
#-------------------------------------------------------------------#
sub _parse_bytestream {
    my $p       = shift;
    my $xml     = shift;
    my $opt     = shift;

    my $expat = $p->_create_parser($opt);
    my $result = $expat->parse($xml);
    $p->_cleanup;
    return $result;
}
#-------------------------------------------------------------------#

#-------------------------------------------------------------------#
# String
#-------------------------------------------------------------------#
sub _parse_string {
    my $p       = shift;
    my $xml     = shift;
    my $opt     = shift;

    my $expat = $p->_create_parser($opt);
    my $result = $expat->parse($xml);
    $p->_cleanup;
    return $result;
}
#-------------------------------------------------------------------#

#-------------------------------------------------------------------#
# SystemId
#-------------------------------------------------------------------#
sub _parse_systemid {
    my $p       = shift;
    my $xml     = shift;
    my $opt     = shift;

    my $expat = $p->_create_parser($opt);
    my $result = $expat->parsefile($xml);
    $p->_cleanup;
    return $result;
}
#-------------------------------------------------------------------#


#-------------------------------------------------------------------#
# $p->_create_parser(\%options)
#-------------------------------------------------------------------#
sub _create_parser {
    my $self = shift;
    my $opt  = shift;

    die "ParserReference: parser instance ($self) already parsing\n"
         if $self->{_InParse};

    my $featUri = 'http://xml.org/sax/features/';
    my $ppe = ($self->get_feature($featUri . 'external-general-entities') or
               $self->get_feature($featUri . 'external-parameter-entities') ) ? 1 : 0;

    my $expat = XML::Parser->new( ParseParamEnt => $ppe );
    $expat->{__XSE} = $self;
    $expat->setHandlers(
                        Init        => \&_handle_init,
                        Final       => \&_handle_final,
                        Start       => \&_handle_start,
                        End         => \&_handle_end,
                        Char        => \&_handle_char,
                        Comment     => \&_handle_comment,
                        Proc        => \&_handle_proc,
                        CdataStart  => \&_handle_start_cdata,
                        CdataEnd    => \&_handle_end_cdata,
                        Unparsed    => \&_handle_unparsed_entity,
                        Notation    => \&_handle_notation_decl,
                        #ExternEnt
                        #ExternEntFin
                        Entity      => \&_handle_entity_decl,
                        Element     => \&_handle_element_decl,
                        Attlist     => \&_handle_attr_decl,
                        Doctype     => \&_handle_start_doctype,
                        DoctypeFin  => \&_handle_end_doctype,
                        XMLDecl     => \&_handle_xml_decl,
                      );

    $self->{_InParse} = 1;
    $self->{_NodeStack} = [];
    $self->{_NSStack} = [];
    $self->{_NSHelper} = XML::NamespaceSupport->new({xmlns => 1});
    $self->{_started} = 0;

    return $expat;
}
#-------------------------------------------------------------------#


#-------------------------------------------------------------------#
# $p->_cleanup
#-------------------------------------------------------------------#
sub _cleanup {
    my $self = shift;

    $self->{_InParse} = 0;
    delete $self->{_NodeStack};
}
#-------------------------------------------------------------------#



#,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,#
#`,`, Expat Handlers ,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,#
#```````````````````````````````````````````````````````````````````#

#-------------------------------------------------------------------#
# _handle_init
#-------------------------------------------------------------------#
sub _handle_init {
    #my $self    = shift()->{__XSE};

    #my $document = {};
    #push @{$self->{_NodeStack}}, $document;
    #$self->SUPER::start_document($document);
}
#-------------------------------------------------------------------#

#-------------------------------------------------------------------#
# _handle_final
#-------------------------------------------------------------------#
sub _handle_final {
    my $self    = shift()->{__XSE};

    #my $document = pop @{$self->{_NodeStack}};
    return $self->SUPER::end_document({});
}
#-------------------------------------------------------------------#

#-------------------------------------------------------------------#
# _handle_start
#-------------------------------------------------------------------#
sub _handle_start {
    my $self    = shift()->{__XSE};
    my $e_name  = shift;
    my %attr    = @_;

    # start_document data
    $self->_handle_start_document({}) unless $self->{_started};

    # take care of namespaces
    my $nsh = $self->{_NSHelper};
    $nsh->push_context;
    my @new_ns;
    for my $k (grep !index($_, 'xmlns'), keys %attr) {
        $k =~ m/^xmlns(:(.*))?$/;
        my $prefix = $2 || '';
        $nsh->declare_prefix($prefix, $attr{$k});
        my $ns = {
                    Prefix       => $prefix,
                    NamespaceURI => $attr{$k},
                 };
        push @new_ns, $ns;
        $self->SUPER::start_prefix_mapping($ns);
    }
    push @{$self->{_NSStack}}, \@new_ns;


    # create the attributes
    my %saxattr;
    map {
        my ($ns,$prefix,$lname) = $nsh->process_attribute_name($_);
        $saxattr{'{' . ($ns || '') . '}' . $lname} = {
                                    Name         => $_,
                                    LocalName    => $lname || '',
                                    Prefix       => $prefix || '',
                                    Value        => $attr{$_},
                                    NamespaceURI => $ns || '',
                                 };
    } keys %attr;


    # now the element
    my ($ns,$prefix,$lname) = $nsh->process_element_name($e_name);
    my $element = {
                    Name         => $e_name,
                    LocalName    => $lname || '',
                    Prefix       => $prefix || '',
                    NamespaceURI => $ns || '',
                    Attributes   => \%saxattr,
                   };

    push @{$self->{_NodeStack}}, $element;
    $self->SUPER::start_element($element);
}
#-------------------------------------------------------------------#

#-------------------------------------------------------------------#
# _handle_end
#-------------------------------------------------------------------#
sub _handle_end {
    my $self    = shift()->{__XSE};

    my %element = %{pop @{$self->{_NodeStack}}};
    delete $element{Attributes};
    $self->SUPER::end_element(\%element);

    my $prev_ns = pop @{$self->{_NSStack}};
    for my $ns (@$prev_ns) {
        $self->SUPER::end_prefix_mapping( { %$ns } );
    }
    $self->{_NSHelper}->pop_context;
}
#-------------------------------------------------------------------#

#-------------------------------------------------------------------#
# _handle_char
#-------------------------------------------------------------------#
sub _handle_char {
    $_[0]->{__XSE}->_handle_start_document({}) unless $_[0]->{__XSE}->{_started};
    $_[0]->{__XSE}->SUPER::characters({ Data => $_[1] });
}
#-------------------------------------------------------------------#

#-------------------------------------------------------------------#
# _handle_comment
#-------------------------------------------------------------------#
sub _handle_comment {
    $_[0]->{__XSE}->_handle_start_document({}) unless $_[0]->{__XSE}->{_started};
    $_[0]->{__XSE}->SUPER::comment({ Data => $_[1] });
}
#-------------------------------------------------------------------#

#-------------------------------------------------------------------#
# _handle_proc
#-------------------------------------------------------------------#
sub _handle_proc {
    $_[0]->{__XSE}->_handle_start_document({}) unless $_[0]->{__XSE}->{_started};
    $_[0]->{__XSE}->SUPER::processing_instruction({ Target => $_[1], Data => $_[2] });
}
#-------------------------------------------------------------------#

#-------------------------------------------------------------------#
# _handle_start_cdata
#-------------------------------------------------------------------#
sub _handle_start_cdata {
    $_[0]->{__XSE}->SUPER::start_cdata( {} );
}
#-------------------------------------------------------------------#

#-------------------------------------------------------------------#
# _handle_end_cdata
#-------------------------------------------------------------------#
sub _handle_end_cdata {
    $_[0]->{__XSE}->SUPER::end_cdata( {} );
}
#-------------------------------------------------------------------#

#-------------------------------------------------------------------#
# _handle_xml_decl
#-------------------------------------------------------------------#
sub _handle_xml_decl {
    my $self    = shift()->{__XSE};
    my $version     = shift;
    my $encoding    = shift;
    my $standalone  = shift;

    if (not defined $standalone) { $standalone = '';    }
    elsif ($standalone)          { $standalone = 'yes'; }
    else                         { $standalone = 'no';  }
    my $xd = {
                Version     => $version,
                Encoding    => $encoding,
                Standalone  => $standalone,
             };
    #$self->SUPER::xml_decl($xd);
    $self->_handle_start_document($xd);
}
#-------------------------------------------------------------------#

#-------------------------------------------------------------------#
# _handle_notation_decl
#-------------------------------------------------------------------#
sub _handle_notation_decl {
    my $self    = shift()->{__XSE};
    my $notation    = shift;
    shift;
    my $system      = shift;
    my $public      = shift;

    my $not = {
                Name        => $notation,
                PublicId    => $public,
                SystemId    => $system,
              };
    $self->SUPER::notation_decl($not);
}
#-------------------------------------------------------------------#

#-------------------------------------------------------------------#
# _handle_unparsed_entity
#-------------------------------------------------------------------#
sub _handle_unparsed_entity {
    my $self    = shift()->{__XSE};
    my $name        = shift;
    my $system      = shift;
    my $public      = shift;
    my $notation    = shift;

    my $ue = {
                Name        => $name,
                PublicId    => $public,
                SystemId    => $system,
                Notation    => $notation,
             };
    $self->SUPER::unparsed_entity_decl($ue);
}
#-------------------------------------------------------------------#

#-------------------------------------------------------------------#
# _handle_element_decl
#-------------------------------------------------------------------#
sub _handle_element_decl {
    $_[0]->{__XSE}->SUPER::element_decl({ Name => $_[1], Model => "$_[2]" });
}
#-------------------------------------------------------------------#


#-------------------------------------------------------------------#
# _handle_attr_decl
#-------------------------------------------------------------------#
sub _handle_attr_decl {
    my $self    = shift()->{__XSE};
    my $ename   = shift;
    my $aname   = shift;
    my $type    = shift;
    my $default = shift;
    my $fixed   = shift;

    my ($vd, $value);
    if ($fixed) {
        $vd = '#FIXED';
        $default =~ s/^(?:"|')//; #"
        $default =~ s/(?:"|')$//; #"
        $value = $default;
    }
    else {
        if ($default =~ m/^#/) {
            $vd = $default;
            $value = '';
        }
        else {
            $vd = ''; # maybe there's a default ?
            $default =~ s/^(?:"|')//; #"
            $default =~ s/(?:"|')$//; #"
            $value = $default;
        }
    }

    my $at = {
                eName           => $ename,
                aName           => $aname,
                Type            => $type,
                ValueDefault    => $vd,
                Value           => $value,
             };
    $self->SUPER::attribute_decl($at);
}
#-------------------------------------------------------------------#

#-------------------------------------------------------------------#
# _handle_entity_decl
#-------------------------------------------------------------------#
sub _handle_entity_decl {
    my $self    = shift()->{__XSE};
    my $name    = shift;
    my $val     = shift;
    my $sys     = shift;
    my $pub     = shift;
    my $ndata   = shift;
    my $isprm   = shift;

    # deal with param ents
    if ($isprm) {
        $name = '%' . $name;
    }

    # int vs ext
    if ($val) {
        my $ent = {
                    Name    => $name,
                    Value   => $val,
                  };
        $self->SUPER::internal_entity_decl($ent);
    }
    else {
        my $ent = {
                    Name        => $name,
                    PublicId    => $pub || '',
                    SystemId    => $sys,
                  };
        $self->SUPER::external_entity_decl($ent);
    }
}
#-------------------------------------------------------------------#


#-------------------------------------------------------------------#
# _handle_start_doctype
#-------------------------------------------------------------------#
sub _handle_start_doctype {
    my $self    = shift()->{__XSE};
    my $name    = shift;
    my $sys     = shift;
    my $pub     = shift;

    $self->_handle_start_document({}) unless $self->{_started};

    my $dtd = {
                Name        => $name,
                SystemId    => $sys,
                PublicId    => $pub,
              };
    $self->SUPER::start_dtd($dtd);
}
#-------------------------------------------------------------------#

#-------------------------------------------------------------------#
# _handle_end_doctype
#-------------------------------------------------------------------#
sub _handle_end_doctype {
    $_[0]->{__XSE}->SUPER::end_dtd( {} );
}
#-------------------------------------------------------------------#


#-------------------------------------------------------------------#
# _handle_start_document
#-------------------------------------------------------------------#
sub _handle_start_document {
    $_[0]->SUPER::start_document($_[1]);
    $_[0]->{_started} = 1;
}
#-------------------------------------------------------------------#


#-------------------------------------------------------------------#
# supported_features
#-------------------------------------------------------------------#
sub supported_features {
    return (
             $_[0]->SUPER::supported_features,
             'http://xml.org/sax/features/external-general-entities',
             'http://xml.org/sax/features/external-parameter-entities',
           );
}
#-------------------------------------------------------------------#





#,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,#
#`,`, Private Helpers `,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,#
#```````````````````````````````````````````````````````````````````#

#-------------------------------------------------------------------#
# _create_node
#-------------------------------------------------------------------#
#sub _create_node {
#    shift;
#    # this may check for a factory later
#    return {@_};
#}
#-------------------------------------------------------------------#


1;
#,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,#
#`,`, Documentation `,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,#
#```````````````````````````````````````````````````````````````````#