XML::SAX::ExpatXS - Perl SAX 2 XS extension to Expat parser


XML-SAX-ExpatXS documentation Contained in the XML-SAX-ExpatXS distribution.

Index


Code Index:

NAME

Top

XML::SAX::ExpatXS - Perl SAX 2 XS extension to Expat parser

SYNOPSIS

Top

 use XML::SAX::ExpatXS;

 $handler = MyHandler->new();
 $parser = XML::SAX::ExpatXS->new( Handler => $handler );
 $parser->parse_uri($uri);
  #or
 $parser->parse_string($xml);

DESCRIPTION

Top

XML::SAX::ExpatXS is a direct XS extension to Expat XML parser. It implements Perl SAX 2.1 interface. See http://perl-xml.sourceforge.net/perl-sax/ for Perl SAX API description. Any deviations from the Perl SAX 2.1 specification are considered as bugs.

Features

The parser behavior can be changed by setting features.

 $parser->set_feature(FEATURE, VALUE);

XML::SAX::ExpatXS provides these adjustable features:

http://xmlns.perl.org/sax/join-character-data

Consequent character data are joined (1, default) or not (0).

http://xmlns.perl.org/sax/ns-attributes

Namespace attributes are reported as common attributes (1, default) or not (0).

http://xmlns.perl.org/sax/xmlns-uris

When set on, xmlns and xmlns:* attributes are put into namespaces in a Perl SAX traditional way; xmlns attributes are in no namespace while xmlns:* attributes are in the http://www.w3.org/2000/xmlns/ namespace. This feature is set to 1 by default.

http://xml.org/sax/features/xmlns-uris

This feature applies if and only if the http://xmlns.perl.org/sax/xmlns-uris feature is off. Then, xmlns and xmlns:* attributes are both put into no namespace (0, default) or into http://www.w3.org/2000/xmlns/ namespace (1).

http://xmlns.perl.org/sax/locator

The document locator is updated (1, default) for ContentHadler events or not (0).

http://xmlns.perl.org/sax/recstring

A recognized string (the text string currently processed by this XML parser) is either maintained as $parser->{ParseOptions}{RecognizedString} (1) or not (0, default).

http://xml.org/sax/features/external-general-entities

Controls whether this parser processes external general entities (1, default) or not (0).

http://xml.org/sax/features/external-parameter-entities

Controls whether this parser processes external parameter entities including an external DTD subset (1) or not (0, default).

Constructor Options

Apart from features, the behavior of this parser can also be changed with options to the constructor.

ParseParamEnt
 ParseParamEnt => 1

This option meaning is exactly the same as the http://xml.org/sax/features/external-parameter-entities feature. The option is supported only because of the compatibility with older versions of this module. Turned off by default.

NoExpand
 NoExpand => 1

No internal entities are expanded if this option is turned on. Turned off by default.

Read-only Properties

ExpatVersion

This property returns a version of linked Expat library, for example expat_1.95.7.

AUTHORS

Top

 Petr Cimprich <petr AT gingerall DOT org> (maintainer)
 Matt Sergeant <matt AT sergeant DOT org>


XML-SAX-ExpatXS documentation Contained in the XML-SAX-ExpatXS distribution.

package XML::SAX::ExpatXS;
use strict;
use vars qw($VERSION @ISA);

use XML::SAX::ExpatXS::Encoding;
use XML::SAX::ExpatXS::Preload;
use XML::SAX::Base;
use DynaLoader ();
use Carp;
use IO::File;

$VERSION = '1.32';
@ISA = qw(DynaLoader XML::SAX::Base XML::SAX::ExpatXS::Preload);

XML::SAX::ExpatXS->bootstrap($VERSION);

my @features = (
	['http://xml.org/sax/features/namespaces', 1],
	['http://xml.org/sax/features/external-general-entities', 1],
	['http://xml.org/sax/features/external-parameter-entities', 0],
    ['http://xml.org/sax/features/xmlns-uris', 0],
    ['http://xmlns.perl.org/sax/xmlns-uris', 1],
    ['http://xmlns.perl.org/sax/version-2.1', 1],
	['http://xmlns.perl.org/sax/join-character-data', 1],
	['http://xmlns.perl.org/sax/ns-attributes', 1],
	['http://xmlns.perl.org/sax/locator', 1],
	['http://xmlns.perl.org/sax/recstring', 0]
			 );
my @supported_features = map($_->[0], @features);


#------------------------------------------------------------
# API methods
#------------------------------------------------------------

sub new {
    my $proto = shift;
    my $options = ($#_ == 0) ? shift : { @_ };

    foreach (@features) {
	$options->{Features}->{$_->[0]} = $_->[1];
    }

    $options->{ExpatVersion} = ExpatVersion();

    return $proto->SUPER::new($options);
}

sub get_feature {
    my ($self, $feat) = @_;
      if (exists $self->{Features}->{$feat}) {
	  return $self->{Features}->{$feat};
      }
      else {
          return $self->SUPER::get_feature($feat);
      }
  }

sub set_feature {
    my ($self, $feat, $val) = @_;
      if (exists $self->{Features}->{$feat}) {
	  return $self->{Features}->{$feat} = $val;
      }
      else {
          return $self->SUPER::set_feature($feat, $val);
      }
  }

sub get_features {
    my $self = shift;
    return %{$self->{Features}};
}

sub supported_features {
    my $self = shift;

    return @supported_features;
}

#------------------------------------------------------------
# internal methods
#------------------------------------------------------------

sub _parse_characterstream {
    my ($self, $fh) = @_;
    $self->{ParseOptions}->{ParseFunc} = \&ParseStream;
    $self->{ParseOptions}->{ParseFuncParam} = $fh;
    $self->_parse;
}

sub _parse_bytestream {
    my ($self, $fh) = @_;
    $self->{ParseOptions}->{ParseFunc} = \&ParseStream;
    $self->{ParseOptions}->{ParseFuncParam} = $fh;
    $self->_parse;
}

sub _parse_string {
    my ($self, $str) = @_;
    $self->{ParseOptions}->{ParseFunc} = \&ParseString;
    $self->{ParseOptions}->{ParseFuncParam} = $str;
    $self->_parse;
}

sub _parse_systemid {
    my ($self, $uri) = @_;
    my $fh = IO::File->new($uri) or croak "ExpatXS: Can't open $uri ($!)";
    $self->{ParseOptions}->{ParseFunc} = \&ParseStream;
    $self->{ParseOptions}->{ParseFuncParam} = $fh;
    $self->_parse;
}

sub _parse {
    my $self = shift;

    my $args = bless $self->{ParseOptions}, ref($self);
    delete $args->{ParseOptions};

    # copy handlers over
    $args->{Handler} = $self->{Handler};
    $args->{DocumentHandler} = $self->{DocumentHandler};
    $args->{ContentHandler} = $self->{ContentHandler};
    $args->{DTDHandler} = $self->{DTDHandler};
    $args->{LexicalHandler} = $self->{LexicalHandler};
    $args->{DeclHandler} = $self->{DeclHandler};
    $args->{ErrorHandler} = $self->{ErrorHandler};
    $args->{EntityResolver} = $self->{EntityResolver};

    $args->{_State_} = 0;
    $args->{Context} = [];
    $args->{ErrorMessage} ||= '';
    $args->{Namespace_Stack} = [[ xml => 'http://www.w3.org/XML/1998/namespace' ]];
    $args->{Parser} = ParserCreate($args, 
				   $args->{Source}{Encoding} 
				   || $args->{ProtocolEncoding}, 
				   1);
    $args->{Locator} = GetLocator($args->{Parser}, 
				  $args->{Source}{PublicId} || '',
				  $args->{Source}{SystemId} || '',
				  $args->{Source}{Encoding} || '',
				 );
    $args->{RecognizedString} = GetRecognizedString($args->{Parser});
    $args->{ExternEnt} = GetExternEnt($args->{Parser});

    $args->{Methods} = {};
    $args->get_start_element();
    $args->get_end_element();
    $args->get_characters();
    $args->get_comment();

    # the most common handlers are available as refs
    SetCallbacks($args->{Parser}, 
		 $args->{Methods}->{start_element},
		 $args->{Methods}->{end_element},
		 $args->{Methods}->{characters},
		 $args->{Methods}->{comment},
  		);

    $args->set_document_locator($args->{Locator});
    $args->start_document({});
   
    my $result;
    $result = $args->{ParseFunc}->($args->{Parser}, $args->{ParseFuncParam});

    ParserFree($args->{Parser});

    my $rv = $args->end_document({});   # end_document is still called on error

    croak($args->{ErrorMessage}) unless $result;
    return $rv;
}

sub _get_external_entity {
    my ($self, $base, $sysid, $pubid) = @_;

    # resolving with the base URI
    if ($base and $sysid and $sysid !~ /^[a-zA-Z]+[a-zA-Z\d\+\-\.]*:/) {
	$base =~ s/[^\/]+$//;
	$sysid = $base . $sysid;
    }

    # user defined resolution
    my $src = $self->resolve_entity({PublicId => $pubid, 
				     SystemId => $sysid});
    my $fh;
    my $result;
    my $string;
    if (ref($src) eq 'CODE') {
	$fh = IO::File->new($sysid)
	  or croak("Can't open external entity: $sysid\n");

    } elsif (ref($src) eq 'HASH') {
	if (defined $src->{CharacterStream}) {
	    $fh = $src->{CharacterStream};

	} elsif (defined $src->{ByteStream}) {
	    $fh = $src->{ByteStream};

	} elsif (defined $src->{String}) {
	    $result = $src->{String};
	    $string = 1;

	} else {
	    $fh = IO::File->new($src->{SystemId})
	      or croak("Can't open external entity: $src->{SystemId}\n");
	}

    } else {
	croak ("Invalid object returned by EntityResolver: $src\n");
    }

    unless ($string) {
	local $/;
	undef $/;
	$result = <$fh>;
	close($fh);
    }
    return $result;
}

sub _get_handler_methods {
    my $self = shift;


}

1;
__END__