Data::Validate::XSD::ParseXML - Parse an XML file into a data structure for validation


Data-Validate-XSD documentation Contained in the Data-Validate-XSD distribution.

Index


Code Index:

NAME

Top

Data::Validate::XSD::ParseXML - Parse an XML file into a data structure for validation

DESCRIPTION

Top

  Please install XML::SAX to use this module.

  Used internally by Data::Validate::XSD to load xml files for both xsd definitions
  and xml data. For the xml data we use a simple conversion metric which treats each
  tag level as an hash reference and multiple tags witht he same name as an array reference.

  For the xsd defininitions we use the same method as the data to aquire the data but then
  It's converted into a simpler format and any features which arn't available will produce
  warnings.

$parser->new( $xml_string )

  Create a new parser object to parse xml files.

$parser->data( )

  Return the parsed data structure.

$parser->definition( )

  Convert the data into a definition, assume it's in xsd format.

$parser->new( )

  Create a new parser object.

SAX PARSING

Top

$parser->start_element( $node )

  Start a new xml element

$parser->end_element( $element )

  Ends an xml element

$parser->characters()

  Handle part of a cdata by concatination

COPYRIGHT

Top

SEE ALSO

Top

Data::Validate::XSD,XML::SAX


Data-Validate-XSD documentation Contained in the Data-Validate-XSD distribution.
package Data::Validate::XSD::ParseXML;

use strict;

use XML::SAX::ParserFactory;

sub new {
	my ($class, $xml) = @_;
	return bless { xml => $xml }, $class;
}

sub data {
	my ($self) = @_;

	my $handler = Data::Validate::XSD::ParseXML::Parser->new();
	my $parser = XML::SAX::ParserFactory->parser( Handler => $handler );
	$parser->parse_string( $self->{'xml'} );

	return $handler->{'root'};
}

sub definition {
	my ($self) = @_;
	my $data   = $self->data();
	my $result = {};

	if($data) {
		$data = $data->{'schema'};
		$self->_decode_complexes( $data->{'complexType'}, $result );
		$self->_decode_simples( $data->{'simpleType'}, $result );

		$result->{'root'} = $self->_decode_elements($data->{'element'}, $result);
	}

	return $result;
}

sub _decode_complexes {
	my ($self, $data, $result) = @_;

	$data = [ $data ] if ref($data) ne 'ARRAY';

	foreach my $d (@{$data}) {
		my $name = $d->{'_name'};
		$self->_decode_complex( $name, $d, $result );
	}
}

sub _decode_complex {
	my ($self, $name, $data, $result) = @_;

	my $elements;
	if($data->{'element'}) {
		$elements = $self->_decode_elements( $data->{'element'}, $result );
	}

	if($data->{'or'}) {
		
	}

	$result->{'complexTypes'}->{$name} = $elements;

}

sub _decode_elements {
	my ($self, $data, $result) = @_;

	$data = [ $data ] if ref($data) ne 'ARRAY';
	my @els;

	foreach my $element (@{$data}) {
		push @els, $self->_decode_element( $element, $result );
	}

	return \@els;
}

sub _decode_simples {
	my ($self, $data, $result) = @_;
	foreach my $d (@{$data}) {
		#my $name = '';
		#$self->_decode_simple( $name, $d, $data );
	}
}

sub _decode_element {
	my ($self, $data, $result) = @_;
	my $element = {};

	if($data->{'complexType'}) {
		my $name = $self->_random_name;
		$element->{'type'} = $name;
		$self->_decode_complex( $name, delete($data->{'complexType'}), $result );
	} elsif($data->{'_type'}) {
		my ($ns, $type) = split(':', delete($data->{'_type'}));
		$element->{'type'} = $type ? $type : $ns;
	}

	foreach my $key (keys(%{$data})) {
		if($key =~ /^\_(.+)$/) {
			$element->{$1} = $data->{$key};
		}
	}

	return $element;
}

sub _random_name {
	my ($self) = @_;
	my @charset = ( 0 .. 9, 'a' .. 'z', 'A' .. 'Z' );

	my $result = '';

	$result .= $charset[int(rand() * @charset)] for(1..10);

	return $result;
}

package Data::Validate::XSD::ParseXML::Parser;

use base qw(XML::SAX::Base);

sub new
{
	my ($class) = @_;
	my $root = {};
	my $self = bless {
		root    => $root,
		current => $root,
		parents => [ ],
		count   => 0,
	}, $class;
	return $self;
}

sub start_element
{
	my ($self, $node) = @_;

	my $name = $node->{'LocalName'};
	my $atrs = $node->{'Attributes'};
	my $ns   = $node->{'Prefix'};
	my $c    = $self->{'current'};
	my $new  = {};

	if(not $c->{$name}) {
		$c->{$name} = $new;
	} else {
		if(ref($c->{$name}) eq 'ARRAY') {
			push @{$c->{$name}}, $new;
		} else {
			$c->{$name} = [ $c->{$name}, $new ];
		}
	}
	push @{$self->{'parents'}}, $c;
	$self->{'count'}++;
	$self->{'name'} = $name;
	$self->{'parent'} = $c;
	$self->{'current'} = $new;

	foreach my $a (keys(%{$atrs})) {
		my $attribute = $atrs->{$a};
		if($attribute->{'Name'} ne 'xmlns') {
			$self->{'current'}->{'_'.$attribute->{'LocalName'}} = $attribute->{'Value'};
		}
	}

}

sub end_element
{
	my ($self, $element) = @_;
	$self->{'count'}++;
	$self->{'current'} = $self->{'parent'};
	pop @{$self->{'parents'}};
	$self->{'parent'} = $self->{'parents'}->[$#{$self->{'parents'}}];
}

sub characters
{
	my ($self, $text) = @_;
	my $t = $text->{'Data'};
	if($t =~ /\S/) {
		my $p = $self->{'parent'};
		my $c = $p->{$self->{'name'}};
		if(ref($c) eq 'HASH') {
			if(%{$c}) {
				if($c->{'+data'}) {
					$c->{'+data'} .= $t;
				} else {
					$c->{'+data'} = $t;
				}
			} else {
				$p->{$self->{'name'}} = $t;
			}
		} elsif(ref($c) eq 'ARRAY') {
			pop @{$c} if ref($c->[$#{$c}]) eq 'HASH' and not %{$c->[$#{$c}]};
			push @{$c}, $t;
		} else {
			$p->{$self->{'name'}} .= $t;
		}
	}
}


1;