| RDF-Core documentation | Contained in the RDF-Core distribution. |
RDF::Core::Parser - RDF Parser
A module for parsing XML documents containing RDF data. It's based on XML::Parser. Parser goes through XML and calls what is referenced in Assert option for each statement found. CAUTION: If you parse more documents into one model, you need to set distinct BNodePrefix (see below) for each document. This way you avoid mixing anonymous resources from distinct documents together.
require RDF::Core::Parser;
my %options = (Assert => \&handleAssert,
BaseURI => "http://www.foo.com/",
BNodePrefix => "genid"
);
my $parser = new RDF::Core::Parser(%options);
$parser->parseFile('./rdfFile.xml');
#or
$parser->parse($rdfString);
Available options are
A reference to a subroutine, that is called for every assertion that parser generates.
A base URI of parsed document. It will be used to resolve relative URI references.
Blank node identifier is generated as "_:" concatenated with BNodePrefix value concatenated with counter number. Default BnodePrefix is "a".
Deprecated.
Assert handler is called with key value pairs in a parameters array.
Keys are:
namespace, local value and URI of subject
namespace, local value and URI of predicate
namespace, local value and URI of object, if the object is a resource
or
object value for literal, it's language and datatype
This package is subject to the MPL (or the GPL alternatively).
Ginger Alliance, rdf@gingerall.cz
RDF::Core::Model::Parser
| RDF-Core documentation | Contained in the RDF-Core distribution. |
# # The contents of this file are subject to the Mozilla Public # License Version 1.1 (the "License"); you may not use this file # except in compliance with the License. You may obtain a copy of # the License at http://www.mozilla.org/MPL/ # # Software distributed under the License is distributed on an "AS # IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or # implied. See the License for the specific language governing # rights and limitations under the License. # # The Original Code is the RDF::Core module # # The Initial Developer of the Original Code is Ginger Alliance Ltd. # Portions created by Ginger Alliance are # Copyright (C) 2001 Ginger Alliance Ltd. # All Rights Reserved. # # Contributor(s): # # Alternatively, the contents of this file may be used under the # terms of the GNU General Public License Version 2 or later (the # "GPL"), in which case the provisions of the GPL are applicable # instead of those above. If you wish to allow use of your # version of this file only under the terms of the GPL and not to # allow others to use your version of this file under the MPL, # indicate your decision by deleting the provisions above and # replace them with the notice and other provisions required by # the GPL. If you do not delete the provisions above, a recipient # may use your version of this file under either the MPL or the # GPL. # package RDF::Core::Parser; use strict; use warnings; require URI; require XML::Parser; use RDF::Core::Constants qw (:xml :rdf); use Carp; ######################################## # constants use constant PARSE_LITERAL => "Literal"; use constant PARSE_RESOURCE => "Resource"; use constant PARSE_COLLECTION => "Collection"; use constant F_IDLE => 0; use constant F_PARSING => 1; use constant F_HASROOT => 2; ######################################## # rdf types use constant RDFT_UNKNOWN => 0; use constant RDFT_BAG => 1; use constant RDFT_SEQ => 2; use constant RDFT_ALT => 4; use constant RDFT_CONTAINER => RDFT_BAG | RDFT_SEQ | RDFT_ALT; my %RDF_TYPES = (RDF_BAG() => RDFT_BAG, RDF_SEQ() => RDFT_SEQ, RDF_ALT() => RDFT_ALT, ); ######################################## # RDF node types (according to spec.) use constant NODE_UNKNOWN => 0; use constant NODE_RDF => 1; use constant NODE_DESCRIPTION => 2; use constant NODE_BAG => 4; use constant NODE_SEQ => 8; use constant NODE_ALT => 16; use constant NODE_CONTAINER => NODE_BAG | NODE_SEQ | NODE_ALT; use constant NODE_PROPERTY => 32; use constant NODE_PROPERTY_1 => 64; #simple w/ value use constant NODE_PROPERTY_2 => 128; #simple literal use constant NODE_PROPERTY_3 => 256; #parse resource use constant NODE_PROPERTY_4 => 512; #with id/resource use constant NODE_PROPERTY_5 => 4096; #parse collection use constant NODE_PROPERTY_MASK => NODE_PROPERTY_1 | NODE_PROPERTY_2 | NODE_PROPERTY_3 | NODE_PROPERTY_4 | NODE_PROPERTY_5; use constant NODE_TYPED => 1024; use constant NODE_OBJ => NODE_DESCRIPTION | NODE_CONTAINER | NODE_TYPED; our %RDF_TYPECONST = (Bag => NODE_BAG, Seq => NODE_SEQ, Alt => NODE_ALT, ); our %RDF_TYPENAMES = reverse %RDF_TYPECONST; ######################################## # ordinary methods sub new { my ($class, %params) = @_; $class = ref $class || $class; if ($params{InlineURI}) { carp "InlineURI parameter is deprecated, use BNodePrefix instead"; } my $self = { assert => $params{Assert}, baseuri => $params{BaseURI}, bnode => $params{BNodePrefix}, nodeid => {} }; bless $self, $class; return $self; } sub parse { my ($self, $what) = @_; #be careful about the circular reference my $expatParser = $self->_createExpatParser; $self->_setFlag(F_PARSING); $expatParser->parse($what); $self->_clearFlag(F_PARSING); } sub parseFile { my ($self, $filename) = @_; my $expatParser = $self->_createExpatParser; $self->_setFlag(F_PARSING); $expatParser->parsefile($filename); $self->_clearFlag(F_PARSING); } ######################################## # tools # attribure processing sub _getNamespaces { my ($self, $attrs) = @_; my %ret; foreach my $name (keys %$attrs) { if ($name =~ /xmlns:?(.*)/) { my $abbr = $1 || ''; $ret{$abbr} = $$attrs{$name}; delete $$attrs{$name}; } } return \%ret; } sub _expandAttributes { my ($self, $element, %attrs) = @_; my $ret = {}; foreach my $name (keys %attrs) { #expand name and store $name =~ /((.*):|^)(.*)/; my ($ns, $local) = ($2, $3); $ns = $ns ? $self->_findNS($ns) : $element->{ns}; $$ret{$ns . $local} = { name => $local, ns => $ns, # value => $attrs{$name}, }; } return $ret; } sub _getElementResource { my ($self, $element) = @_; undef my $ret; if ($element->{resource}) { $ret = $element->{resource}; } elsif ($element->{nodeid}) { $ret = $self->_getImplicitURI($element->{nodeid}); } elsif ($element->{rnode}) { $ret = $self->_uri($element->{rnode}); } return $ret; } # miscellaneous sub _localName { my ($self, $in) = @_; $in =~ /((.*):|^)(.*)/; return $3; } sub _nsAbbr { my ($self, $in) = @_; $in =~ /((.*):|^)(.*)/; return $2 || ''; } sub _uri { my ($self, $element) = @_; return $element->{uri} || ($element->{uri} = $self->_getImplicitURI($element->{nodeid})); } sub _validFirstLevel { my ($self, $element) = @_; return $element->{qname} eq RDF_DESCRIPTION; } sub _doAssert { my ($self, $subject, $params, $stmt) = @_; my %params = %$params; #ordinary assertion unless ($subject->{abouteach}) { &{$self->{assert}}(%params); } #about each caching else { my $slot = ${$self->{abouteach}}{$subject->{abouturi}}; $slot = ${$self->{abouteach}}{$subject->{abouturi}} = [] unless $slot; my %foo = map {($_, $params{$_})} grep {$_ !~ /^subject/} keys %params; push @$slot, {%foo}; } #reification if ($subject->{bagid} || $stmt) { my $suri = $stmt || $self->_getImplicitURI; my $stmt = { statement_uri => $suri, %params, }; $self->_assertReification($stmt); if ($subject->{bagid}) { my $buri = $subject->{baguri}; $self->{urimembers}{$buri} = [] unless exists $self->{urimembers}{$buri}; push @{$self->{urimembers}{$buri}}, $suri; #collect bag members for later assertion push @{$subject->{bagmembers}}, $suri; } } } sub _assertReification { my ($self, $stmt) = @_; #type my $params = {subject_uri => $stmt->{statement_uri}, predicate_ns => RDF_NS, predicate_name => 'type', predicate_uri => RDF_TYPE, object_ns => RDF_NS, object_name => 'Statement', object_uri => RDF_STATEMENT, }; $self->_doAssert({}, $params); #subject $params = {subject_uri => $stmt->{statement_uri}, predicate_ns => RDF_NS, predicate_name => 'subject', predicate_uri => RDF_SUBJECT, object_uri => $stmt->{subject_uri}, }; $params->{object_ns} = $stmt->{subject_ns} if $stmt->{subject_ns}; $params->{object_name} = $stmt->{subject_name} if $stmt->{subject_name}; $self->_doAssert({}, $params); #predicate $params = {subject_uri => $stmt->{statement_uri}, predicate_ns => RDF_NS, predicate_name => 'predicate', predicate_uri => RDF_PREDICATE, object_uri => $stmt->{predicate_uri}, }; $params->{object_ns} = $stmt->{predicate_ns} if $stmt->{predicate_ns}; $params->{object_name} = $stmt->{predicate_name} if $stmt->{predicate_name}; $self->_doAssert({}, $params); #object $params = {subject_uri => $stmt->{statement_uri}, predicate_ns => RDF_NS, predicate_name => 'object', predicate_uri => RDF_OBJECT, }; if ($stmt->{object_uri}) { $params->{object_uri} = $stmt->{object_uri}; $params->{object_ns} = $stmt->{object_ns} if $stmt->{object_ns}; $params->{object_name} = $stmt->{object_name} if $stmt->{object_name}; } else { $params->{object_literal} = $stmt->{object_literal}; $params->{object_datatype} = $stmt->{object_datatype}; $params->{object_lang} = $stmt->{object_lang}; } $self->_doAssert({}, $params); } sub _assertAttributes { my ($self, $subject, $attrs) = @_; #foreach my $attr (grep {$_->{ns} ne RDF_NS} values %$attrs) { foreach my $attr (values %$attrs) { my $qname = $attr->{ns} . $attr->{name}; #test member for ebaoutEach on ID my $re = '^' . RDF_NS . '_\d+$'; if ($qname =~ /$re/) { #it seems there is nothing to catch for ID, because #this member is literal and has no resource } #assert my %params = ( subject_uri => $self->_uri($subject), predicate_ns => $attr->{ns}, predicate_name => $attr->{name}, predicate_uri => $qname, object_literal => defined $attr->{value} ? $attr->{value} : "", object_lang => $self->_findLang() || "", object_datatype => "", ); $self->_doAssert($subject, \%params); } } sub _assertPropAttrs { my ($self, $element, $attrs) = @_; my $type = $element->{type}; #if (($type == NODE_DESCRIPTION) || ($type == NODE_TYPED)) { if ($type & NODE_OBJ) { my $subject = ${$self->{subjects}}[-1]; $self->_assertAttributes($subject, $attrs); } elsif ($type == NODE_PROPERTY_4) { my $subject = {uri => $self->_getElementResource($element)}; $self->_assertAttributes($subject, $attrs); } } sub _assertRDFAttrs { my ($self, $element, $attrs) = @_; my $type = $element->{type}; if ($type == NODE_TYPED) { $self->_assertRDFTypeElement($element, $element); } elsif ($type & NODE_CONTAINER) { $self->_assertRDFType($element, $type); } elsif ($element->{rdftype}) { $self->_assertRDFTypeString($element, $element->{rdftype}); my $ctype = $RDF_TYPES{$element->{rdftype}}; $element->{containertype} = $ctype if $ctype and RDFT_CONTAINER; } } sub _assertElement { my ($self, $expat, $subject, $element) = @_; my $uri; if ($element->{type} == NODE_PROPERTY_5) { #Collection - prepare assertion for rdf:nil terminator if ($element->{collast}) { $subject = {uri=>$$element{collast}}; $element = {ns=>RDF_NS, name=>'rest', qname=>RDF_REST}; } $uri=RDF_NIL; } else { #Other then collection properties $uri = $self->_getElementResource($element); } if ($element->{resource} && __trim($element->{text})) { $expat->xpcroak("predicate has both of resource and literal"); } if ($element->{datatype} && $uri) { $expat->xpcroak("invalid rdf:datatype use"); } my %object; if ($uri) { %object = (object_uri => $uri); } else { %object = (object_literal => defined $element->{text} ? $element->{text} : "", object_datatype =>$element->{datatype} || "", object_lang => $self->_findLang($element) || "", ); } my %params = ( subject_uri => $self->_uri($subject), predicate_ns => $element->{ns}, predicate_name => $element->{name}, predicate_uri => $element->{qname}, %object, ); $self->_doAssert($subject, \%params, $element->{uri}); } sub _assertRDFType { my ($self, $subject, $type) = @_; my %params = ( subject_uri => $self->_uri($subject), predicate_ns => RDF_NS, predicate_name => 'type', predicate_uri => RDF_TYPE, object_ns => RDF_NS, object_name => $RDF_TYPENAMES{$type}, object_uri => RDF_NS . $RDF_TYPENAMES{$type}, ); $self->_doAssert($subject, \%params); } sub _assertRDFTypeString { my ($self, $subject, $string) = @_; my %params = ( #subject_ns => $subject->{ns}, #subject_name => $subject->{name}, subject_uri => $self->_uri($subject), predicate_ns => RDF_NS, predicate_name => 'type', predicate_uri => RDF_TYPE, object_uri => $string, ); $self->_doAssert($subject, \%params); } sub _assertRDFTypeElement { my ($self, $subject, $element) = @_; my %params = ( #subject_ns => $subject->{ns}, #subject_name => $subject->{name}, subject_uri => $self->_uri($subject), predicate_ns => RDF_NS, predicate_name => 'type', predicate_uri => RDF_TYPE, object_ns => $element->{ns}, object_name => $element->{name}, object_uri => $element->{qname}, ); $self->_doAssert($subject, \%params); } sub _assertAboutEach { my $self = shift; for my $m (keys %{$self->{urimembers}}) { my $members = $self->{urimembers}{$m}; my $abouts = $self->{abouteach}{$m}; next unless $abouts; for my $a (@$abouts) { my %aparams = %$a; for my $s (@$members) { my %params = %aparams; $params{subject_uri} = $s; #assert w/ empty subject $self->_doAssert({}, \%params); } } } } sub _assertCollectionItem { my ($self, $subject, $predicate, $item) = @_; my $colItem = $self->_getImplicitURI; if ($predicate->{collast}) { my %params = ( subject_uri => $predicate->{collast}, predicate_ns => RDF_NS, predicate_name => "rest", predicate_uri => RDF_REST, object_uri => $colItem, ); $self->_doAssert({},\%params); } else { my %params = ( subject_uri => $subject->{uri}, predicate_ns => $predicate->{ns}, predicate_name => $predicate->{name}, predicate_uri => $predicate->{qname}, object_uri => $colItem, ); $self->_doAssert($predicate,\%params, $predicate->{uri}); } my %params = ( subject_uri => $colItem, predicate_ns => RDF_NS, predicate_name => "type", predicate_uri => RDF_TYPE, object_uri => RDF_LIST, ); $self->_doAssert({},\%params); %params = ( subject_uri => $colItem, predicate_ns => RDF_NS, predicate_name => "first", predicate_uri => RDF_FIRST, object_uri => $item->{uri}, ); $self->_doAssert({},\%params); $self->{path}[-1]{collast} = $colItem; } sub _getLIURI { my ($self, $subject) = @_; #rdf:li element can appear outside rdf:Description element #(i.e. $subject can be undef) my $id = "_" . ++($subject || $self)->{li_counter}; return (RDF_NS, $id, RDF_NS . $id); } sub __trim { my $val = shift; $val =~ s/^\s*$//sg if (defined($val)); return $val; } sub __checkParseType { my $element = shift; return unless $element->{parsetype}; my $re = PARSE_LITERAL . "|" . PARSE_RESOURCE . "|" . PARSE_COLLECTION; $element->{parsetype} = PARSE_LITERAL unless $element->{parsetype} =~ /$re/; } sub _updateElement { my ($self, $element, $attrs) = @_; #rdf attributes my $about = delete $$attrs{+RDFA_ABOUT}; my $abouteach = delete $$attrs{+RDFA_ABOUTEACH}; my $id = delete $$attrs{+RDFA_ID}; my $nodeid = delete $$attrs{+RDFA_NODEID}; my $bagid = delete $$attrs{+RDFA_BAGID}; my $parsetype = delete $$attrs{+RDFA_PARSETYPE}; my $rdftype = delete $$attrs{+RDFA_TYPE}; my $datatype = delete $$attrs{+RDFA_DATATYPE}; my $resource = delete $$attrs{+RDFA_RESOURCE}; my $xmllang = delete $$attrs{+XMLA_LANG}; my $xmlbase = delete $$attrs{+XMLA_BASE}; $element->{about} = $about ? $about->{value} : undef; $element->{abouteach} = $abouteach ? $abouteach->{value} : undef; $element->{id} = $id ? $id->{value} : undef; $element->{nodeid} = $nodeid ? $nodeid->{value} : undef; $element->{bagid} = $bagid ? $bagid->{value} : undef; $element->{bagmembers} = []; $element->{parsetype} = $parsetype ? $parsetype->{value} : undef; __checkParseType($element); $element->{rdftype} = $rdftype ? $rdftype->{value} : undef; $element->{datatype} = $datatype ? $datatype->{value} : undef; $element->{resource} = $resource ? $resource->{value} : undef; $element->{lang} = $xmllang ? $xmllang->{value} : undef; $element->{baseuri} = $xmlbase ? $xmlbase->{value} : undef; #create uri/about-uri (from about or id) if (defined $element->{about}) { my $baseURI = new URI($self->_findBaseURI); my $u; if ($element->{about} eq '') { #base uri with fragment removed $u = $baseURI; if ($baseURI->fragment) { my $scheme = $u->scheme; my $opaque = $u->opaque; $u = new URI($opaque); $u->scheme($scheme); } } else { $u = new_abs URI($element->{about}, $baseURI); } $element->{uri} = $u->as_string; } elsif (defined $element->{id}) { my $baseURI = new URI($self->_findBaseURI); my $u = new URI($baseURI); $u->fragment($element->{id}); $element->{uri} = $u->as_string; } elsif ($element->{abouteach}) { my $u = new URI($self->_findBaseURI); #$u->fragment($element->{abouteach}); _fixme_ $element->{abouturi} = $self->_findBaseURI . $element->{abouteach}; } if ($element->{resource}) { my $u = new URI($element->{resource}); $element->{resource} = $u->abs($self->_findBaseURI); } #create bagid uri if ($element->{bagid}) { my $u = new URI($self->_findBaseURI); $u->fragment($element->{bagid}); $element->{baguri} = $u->as_string; } #rename element if it is the rdf:li (I hope it is correct) if ($element->{qname} eq RDF_LI) { my $subject = @{$self->{subjects}} > 0 ? ${$self->{subjects}}[-1] : undef; my ($ns, $name, $uri) = $self->_getLIURI($subject); #we rename the whole element, hopefuly it doesn't matter $element->{ns} = $ns; $element->{name} = $name; $element->{qname} = $uri; } } sub _analyzePath { my ($self, $expat, $attrs) = @_; my $path = $self->{path}; #guess the node type my $ce = $$path[-1]; my $ct; #current type my $re = '^' . RDF_NS . '_(\d+)$'; SWITCH: for ($ce->{qname}) { $_ eq RDF_RDF && do {$ct = NODE_RDF; last SWITCH;}; $_ eq RDF_DESCRIPTION && do {$ct = NODE_DESCRIPTION; last SWITCH;}; $_ eq RDF_BAG && do {$ct = NODE_BAG; $ce->{containertype} = RDFT_BAG; last SWITCH;}; $_ eq RDF_SEQ && do {$ct = NODE_SEQ; $ce->{containertype} = RDFT_SEQ; last SWITCH;}; $_ eq RDF_ALT && do {$ct = NODE_ALT; $ce->{containertype} = RDFT_ALT; last SWITCH;}; #deafult $ct = NODE_UNKNOWN; #for now - property or typed object } #check validity in the context of the parent node #and optionally fix the node type for NODE_UNKNOWN my $pe = $$path[-2]; my $pt = $pe ? $pe->{type} : undef; if (! defined $pt) { #the very beginning $expat->xpcroak("bad root element") unless ($ct & NODE_OBJ) || $ct ==NODE_RDF; if ($ct == NODE_UNKNOWN) { $ct = NODE_TYPED; } } elsif ($pt == NODE_RDF) { $ct = NODE_TYPED if $ct == NODE_UNKNOWN; $expat->xpcroak("invalid first level element") unless $ct & NODE_OBJ; } elsif ($pt == NODE_DESCRIPTION || $pt == NODE_TYPED) { $expat->xpcroak("invalid node in the Description element") unless $ct == NODE_UNKNOWN; #force type to property - checked later $ct = NODE_PROPERTY; } elsif ($pt & NODE_CONTAINER) { $expat->xpcroak("invalid node in container") unless $ct == NODE_UNKNOWN; $ct = NODE_PROPERTY; } elsif ($pt & NODE_PROPERTY_MASK) { $expat->xpcroak("invalid node in the memeber element") unless $ct == NODE_UNKNOWN || ($ct & NODE_OBJ); if ($ct == NODE_UNKNOWN) { if ($pt == NODE_PROPERTY_3) { $ct = NODE_PROPERTY; } else { $ct = NODE_TYPED; } } } else { $expat->xpcroak("unknown parent node type: $pt"); } #if we found, that we're NODE_PROPERTY, we'' try to determine the subtype if ($ct == NODE_PROPERTY) { my $ruri = $self->_getElementResource($ce); if ($ruri || %$attrs) { $ct = NODE_PROPERTY_4; $ce->{resource} ||= $self->_getImplicitURI($ce->{nodeid}); } elsif ($ce->{parsetype}) { if ($ce->{parsetype} eq PARSE_COLLECTION) { $ct = NODE_PROPERTY_5; } elsif ($ce->{parsetype} eq PARSE_RESOURCE) { $ct = NODE_PROPERTY_3; } elsif ($ce->{parsetype} eq PARSE_LITERAL) { $ct = NODE_PROPERTY_2; } } else { $ct = NODE_PROPERTY_1; } } #set node type $ce->{type} = $ct; } sub __slice { my ($element, $keys) = @_; my (%set, $count); foreach (@$keys) { $set{$_} = $element->{$_}; $count++ if defined $set{$_}; } return wantarray ? %set : $count; } sub _checkAttributes { my ($self, $expat, $element, $attrs) = @_; my $allset = [qw(about abouteach id bagid parsetype rdftype resource nodeid datatype)]; my $aboutset = [qw(about abouteach id)]; #all except about and bag my $inverseset1 = [qw(parsetype resource datatype)]; #2 - all except id my $inverseset2 = [qw(about abouteach bagid parsetype rdftype resource nodeid)]; #3 - all except id and parsetype my $inverseset3 = [qw(about abouteach bagid rdftype resource nodeid datatype)]; #4 - all except resource, id and bag my $inverseset4 = [qw(about abouteach parsetype rdftype datatype)]; my $et = $element->{type}; #check xml attributes (shouldn't be any) if (grep {$_->{ns} eq XML_NS} values %$attrs) { $expat->xpcroak("invalid xml attribute"); } if ($et == NODE_RDF) { $expat->xpcroak("invalid attribute") if scalar __slice($element, $allset) || %$attrs; } elsif ($et == NODE_DESCRIPTION || $et == NODE_TYPED) { $expat->xpcroak("invalid attribute") if scalar __slice($element, $inverseset1); $expat->xpcroak("invalid attribute") if scalar __slice($element, $aboutset) && $element->{nodeid}; } elsif ($et & NODE_CONTAINER) { $expat->xpcroak("invalid attribute") if scalar __slice($element, $inverseset1); $element->{hasmembers} = 1 if %$attrs; } elsif ($et == NODE_PROPERTY_1) { $expat->xpcroak("invalid attribute") if scalar __slice($element, $inverseset2) || %$attrs; } elsif ($et == NODE_PROPERTY_2 || $et == NODE_PROPERTY_3 || $et == NODE_PROPERTY_5) { $expat->xpcroak("invalid attribute") if scalar __slice($element, $inverseset3) || %$attrs; } elsif ($et == NODE_PROPERTY_4) { $expat->xpcroak("invalid attribute") if scalar __slice($element, $inverseset4); $element->{hasprops} = 1 if %$attrs; } } sub _checkNoResource { my ($self, $expat, $element) = @_; $expat->xpcroak("element contain both of rdf:resource and nested node") if $element->{resource}; $expat->xpcroak("element contain both of rdf:nodeID and nested node") if $element->{nodeid}; } #creates the 'current' subject sub _createSubject { my ($self, $expat, $element) = @_; my $type = $element->{type}; if ($type & NODE_OBJ) { push @{$self->{subjects}}, $element; $element->{subject} = 1; my $parent = ${$self->{path}}[-2]; if ($parent && $parent->{type} != NODE_RDF) { $self->_checkNoResource($expat, $parent); $parent->{rnode} = $element; } } elsif ($type == NODE_PROPERTY_3) { #rdf:parseType="Resource" my $subject = {uri => $self->_getImplicitURI}; push @{$self->{subjects}}, $subject; $element->{presubject} = 1; $self->_checkNoResource($element); $element->{rnode} = $subject; } } ######################################## # handlers sub init { my ($self, $expat) = @_; # print "---> init\n"; $self->{path} = []; $self->{subjects} = []; $self->{status} = F_IDLE; $self->{unique} = 0; $self->{urimembers} = {}; $self->{abouteach} = {}; } sub final { my ($self, $expat) = @_; # print "---> final\n"; $self->_assertAboutEach; # print "subjects: ", Dumper($self->{subjects}); # print "urimembers: ", Dumper($self->{urimembers}); # print "abouteachs: ", Dumper($self->{abouteach}); } sub start { my ($self, $expat, $name, %attrs) = @_; my $element; my $subject; # extract namespace declarations and create element push @{$self->{path}}, $element = { name => $self->_localName($name), nslist => $self->_getNamespaces(\%attrs), members => [], }; $element->{ns} = $self->_findNS($self->_nsAbbr($name)); $element->{qname} = $element->{ns} . $element->{name}; #expand attributes (must follow namespaces handling) my $attrs = $self->_expandAttributes($element, %attrs); #update element (rename, read red attributes) $self->_updateElement($element, $attrs); #now we have all (almost) information to decide on node type #we must check the validity and update element status $self->_analyzePath($expat, $attrs); #check whether attributes match the node type $self->_checkAttributes($expat, $element, $attrs); # tool variables #c'on baby... $self->_createSubject($expat, $element); #spit out attributes $self->_assertPropAttrs($element, $attrs); $self->_assertRDFAttrs($element, $attrs); #switch to the literal mode if needed if ($element->{type} == NODE_PROPERTY_2) { $element->{datatype} = RDF_XMLLITERAL; $expat->setHandlers(%{$self->_getHandlersLiteral($name)}); } } sub end { my ($self, $expat, $name) = @_; my $element = pop @{$self->{path}}; pop @{$self->{subjects}} if $element->{presubject}; #remember current subject my $subject = $self->{subjects}[-1]; if (($element->{type} & NODE_PROPERTY_MASK)) { $self->_assertElement($expat, $subject, $element); #update parent type (usefull for containers) if ($element->{qname} eq RDF_TYPE) { my $ctype = $RDF_TYPES{$self->_getElementResource($element)}; if ($ctype and $ctype & RDFT_CONTAINER) { $subject->{containertype} = $ctype; } } #catch memebers for aboutEach on ID my $re = '^' . RDF_NS . '_\d+$'; if ($element->{qname} =~ /$re/) { my $uri = $self->_getElementResource($element); push @{$subject->{members}}, $uri if $uri; } } if ($element->{subject}) { #Collection item if ($self->{path}[-1] && $self->{path}[-1]->{type} == NODE_PROPERTY_5) { $self->_assertCollectionItem($self->{subjects}[-2], $self->{path}[-1], $element ); } #remember aboutEach stuff if ( $element->{containertype}) { $self->{urimembers}{$element->{uri}} = $element->{members}; } if ($element->{bagid}) { #assert bags created by rdf:bagID attr my $bagElement = {uri=>$element->{baguri}}; $self->_assertRDFType($bagElement,NODE_BAG); foreach (@{$element->{bagmembers}}) { my ($ns, $name, $uri) = $self->_getLIURI($bagElement); my %params = ( subject_uri => $self->_uri($bagElement), predicate_ns => $ns, predicate_name => $name, predicate_uri => $uri, object_uri => $_, ); $self->_doAssert($bagElement,\%params); } } } pop @{$self->{subjects}} if $element->{subject}; } sub char { my ($self, $expat, $string) = @_; my $elt = ${$self->{path}}[-1]; if ($string !~ /^\s*$/s && $elt->{type} != NODE_PROPERTY_1) { $expat->xpcroak("Element '" . $elt->{name} . "' can not contain a literal value"); } $elt->{text} .= $string; } sub start_literal { my ($self, $expat, $name, %attrs) = @_; ${$self->{path}}[-1]{text} .= $expat->recognized_string; } sub end_literal { my ($self, $fname, $expat, $name) = @_; if ($name eq $fname) { $expat->setHandlers(%{$self->_getHandlersRegular}); $self->end($expat, $name); } else { ${$self->{path}}[-1]{text} .= $expat->recognized_string; } } sub char_literal { my ($self, $expat, $string) = @_; ${$self->{path}}[-1]{text} .= $string; } ######################################## # private methods sub _findNS { my ($self, $abbr) = @_; return XML_NS if $abbr eq 'xml'; foreach my $element (reverse @{$self->{path}}) { return $element->{nslist}{$abbr} if exists $element->{nslist}{$abbr}; } } sub _findBaseURI { my ($self) = @_; my $baseURI = $self->{baseuri}; foreach my $element (reverse @{$self->{path}}) { if (defined $element->{baseuri}) { $baseURI = $element->{baseuri}; last; } } return $baseURI; } sub _findLang { my ($self, $lastElement) = @_; my $lang = $self->{lang}; foreach my $element ($lastElement, reverse @{$self->{path}}) { next unless defined $element; if (defined $element->{lang}) { $lang = $element->{lang}; last; } } return $lang; } sub _getHandlers { my $self = shift; my %handlers = ( Init => sub {$self->init(@_)}, Final => sub {$self->final(@_)}, Start => sub {$self->start(@_)}, End => sub {$self->end(@_)}, Char => sub {$self->char(@_)}, ); return \%handlers; } sub _getHandlersLiteral { my ($self, $name) = @_; my %handlers = ( Start => sub {$self->start_literal(@_)}, End => sub {$self->end_literal($name, @_)}, Char => sub {$self->char_literal(@_)}, ); return \%handlers; } sub _getHandlersRegular { my ($self, $name) = @_; my %handlers = ( Start => sub {$self->start(@_)}, End => sub {$self->end(@_)}, Char => sub {$self->char(@_)}, ); return \%handlers; } sub _createExpatParser { my $self = shift; my $expat = new XML::Parser( Handlers => $self->_getHandlers, ); return $expat; } sub _setFlag { my ($self, $flag) = @_; $self->{status} |= $flag; } sub _hasFlag { my ($self, $flag) = @_; return $self->{status} & $flag; } sub _clearFlag { my ($self, $flag) = @_; $self->{status} &= ~$flag; } sub _getImplicitURI { my ($self, $nodeID) = @_; my $ret; $ret = "_:" .($self->{bnode} || "a"); $ret .= ++ $self->{unique}; if ($nodeID) { if ($self->{nodeid}{$nodeID}) { #use known node ID instead $ret = $self->{nodeid}{$nodeID} } else { #remember node ID $self->{nodeid}{$nodeID} = $ret } } return $ret; } 1; __END__