/usr/local/CPAN/RDF-Notation3/RDF/Notation3.pm
package RDF::Notation3;
require 5.005_62;
use strict;
#use warnings;
use vars qw($VERSION);
use File::Spec::Functions ();
use Carp;
use RDF::Notation3::ReaderFile;
use RDF::Notation3::ReaderString;
$VERSION = '0.91';
############################################################
sub new {
my ($class) = @_;
my $self = {
ansuri => '#',
quantif => 1,
nIDpref => '_:a', # this fits to RDF:Core prefix for nodeID
};
bless $self, $class;
return $self;
}
sub parse_file {
my ($self, $path) = @_;
$self->_define;
my $fh;
if (ref $path eq 'IO::File') {
$fh = $path;
} else {
open(FILE, "$path") or $self->_do_error(2, $path);
$fh = *FILE;
}
my $t = new RDF::Notation3::ReaderFile($fh);
$self->{reader} = $t;
$self->_document;
close (FILE);
}
sub parse_string {
my ($self, $str) = @_;
$self->_define;
my $t = new RDF::Notation3::ReaderString($str);
$self->{reader} = $t;
$self->_document;
}
sub anonymous_ns_uri {
my ($self, $uri) = @_;
if (@_ > 1) {
$self->{ansuri} = $uri;
} else {
return $self->{ansuri};
}
}
sub quantification {
my ($self, $val) = @_;
if (@_ > 1) {
$self->_do_error(4, $val)
unless $val == 1 || $val == 0;
$self->{quantif} = $val;
} else {
return $self->{quantif};
}
}
sub _define {
my ($self) = @_;
$self->{ns} = {};
$self->{context} = '<>';
$self->{gid} = 1;
$self->{cid} = 1;
$self->{hardns} = {
rdf => ['rdf','http://www.w3.org/1999/02/22-rdf-syntax-ns#'],
daml => ['daml','http://www.daml.org/2001/03/daml+oil#'],
log => ['log','http://www.w3.org/2000/10/swap/log.n3#'],
};
$self->{keywords} = [];
}
sub _document {
my ($self) = @_;
my $next = $self->{reader}->try;
#print ">doc starts: $next\n";
if ($next ne ' EOF ') {
$self->_statement_list;
}
#print ">end\n";
}
sub _statement_list {
my ($self) = @_;
my $next = $self->_eat_EOLs;
#print ">statement list: $next\n";
while ($next ne ' EOF ') {
if ($next =~ /^(?:|#.*)$/) {
$self->_space;
} elsif ($next =~ /^}/) {
#print ">end of nested statement list: $next\n";
last;
} else {
$self->_statement;
}
$next = $self->_eat_EOLs;
}
#print ">end of statement list: $next\n";
}
sub _space {
my ($self) = @_;
#print ">space: ";
my $tk = $self->{reader}->get;
# comment or empty string
while ($tk ne ' EOL ') {
#print ">$tk ";
$tk = $self->{reader}->get;
}
#print ">\n";
}
sub _statement {
my ($self, $subject) = @_;
my $next = $self->{reader}->try;
#print ">statement starts: $next\n";
if ($next =~ /^\@prefix|\@keywords|bind$/) {
$self->_directive;
} else {
$subject = $self->_node unless $subject;
#print ">subject: $subject\n";
my $properties = [];
$self->_property_list($properties);
#print ">CONTEXT: $self->{context}\n";
#print ">SUBJECT: $subject\n";
#print ">PROPERTY: void\n" unless @$properties;
#foreach (@$properties) { # comment/uncomment by hand
#print ">PROPERTY: ", join('-', @$_), "\n";
#}
$self->_process_statement($subject, $properties) if @$properties;
}
# next step
$next = $self->_eat_EOLs;
if ($next eq '.') {
$self->{reader}->get;
} elsif ($next =~ /^\.(.*)$/) {
$self->{reader}->get;
unshift @{$self->{reader}->{tokens}}, $1;
} elsif ($next =~ /^(?:\]|\)|\})/) {
} else {
$self->_do_error(115,$next);
}
}
sub _node {
my ($self) = @_;
my $next = $self->_eat_EOLs;
#print ">node: $next\n";
if ($next =~ /^[\[\{\(]/) {
#print ">node is anonnode\n";
return $self->_anonymous_node;
} elsif ($next eq 'this') {
#print ">this\n";
$self->{reader}->get;
return "$self->{context}";
} elsif ($next =~ /^(<[^>]*>|^(?:[_a-zA-Z]\w*)?:[_a-zA-Z][_\w]*)(.*)$/) {
#print ">node is uri_ref2: $next\n";
if ($2) {
$self->{reader}->get;
unshift @{$self->{reader}->{tokens}}, $2;
unshift @{$self->{reader}->{tokens}}, $1;
#print ">cleaned uri_ref2: $1\n";
}
return $self->_uri_ref2;
} elsif ($self->{keywords}[0] && ($next =~ /^(^[_a-zA-Z][_\w]*)(.*)$/)) {
#print ">node is uri_ref_kw: $next\n";
$self->{reader}->get;
unshift @{$self->{reader}->{tokens}}, $2 if $2;
unshift @{$self->{reader}->{tokens}}, ':' . $1;
#print ">cleaned uri_ref2: $1\n";
return $self->_uri_ref2;
} else {
#print ">unknown node: $next\n";
$self->_do_error(116,$next);
}
}
sub _directive {
my ($self) = @_;
my $tk = $self->{reader}->get;
#print ">directive: $tk\n";
if ($tk eq '@prefix') {
my $tk = $self->{reader}->get;
if ($tk =~ /^([_a-zA-Z]\w*)?:$/) {
my $pref = $1;
#print ">nprefix: $pref\n" if $pref;
my $ns_uri = $self->_uri_ref2;
$ns_uri =~ s/^<(.*)>$/$1/;
if ($pref) {
$self->{ns}->{$self->{context}}->{$pref} = $ns_uri;
} else {
$self->{ns}->{$self->{context}}->{''} = $ns_uri;
}
} else {
$self->_do_error(102,$tk);
}
} elsif ($tk eq '@keywords') {
my $kw = $self->{reader}->get;
while ($kw =~ /,$/) {
$kw =~ s/,$//;
push @{$self->{keywords}}, $kw;
$kw = $self->{reader}->get;
}
if ($kw =~ /^(.+)\.$/) {
push @{$self->{keywords}}, $1;
unshift @{$self->{reader}{tokens}}, '.';
} else {
$self->_do_error(117,$tk);
}
#print ">keywords: ", join('|', @{$self->{keywords}}), "\n";
} else {
$self->_do_error(101,$tk);
}
}
sub _uri_ref2 {
my ($self) = @_;
# possible end of statement, a simple . check is done
my $next = $self->{reader}->try;
if ($next =~ /^(.+)\.$/) {
$self->{reader}->{tokens}->[0] = '.';
unshift @{$self->{reader}->{tokens}}, $1;
}
my $tk = $self->{reader}->get;
#print ">uri_ref2: $tk\n";
if ($tk =~ /^<[^>]*>$/) {
#print ">URI\n";
return $tk;
} elsif ($tk =~ /^([_a-zA-Z]\w*)?:[a-zA-Z]\w*$/) {
#print ">qname ($1:)\n" if $1;
my $pref = '';
$pref = $1 if $1;
if ($pref eq '_') { # workaround to parse N-Triples
$self->{ns}->{$self->{context}}->{_} = $self->{ansuri}
unless $self->{ns}->{$self->{context}}->{_};
}
# Identifier demunging
$tk = _unesc_qname($tk) if $tk =~ /_/;
return $tk;
} else {
$self->_do_error(103,$tk);
}
}
sub _property_list {
my ($self, $properties) = @_;
my $next = $self->_eat_EOLs;
#print ">property list: $next\n";
$next = $self->_check_inline_comment($next);
if ($next =~ /^:-/) {
#print ">anonnode\n";
# TBD
$self->_do_error(199, $next);
} elsif ($next =~ /^\./) {
#print ">void prop_list\n";
# TBD
} else {
#print ">prop_list with verb\n";
my $property = $self->_verb;
#print ">property is back: $property\n";
my $objects = [];
$self->_object_list($objects);
unshift @$objects, $property;
unshift @$objects, 'i' if ($next eq 'is' or $next eq '<-');
#print ">inverse mode\n" if ($next eq 'is' or $next eq '<-');
push @$properties, $objects;
}
# next step
$next = $self->_eat_EOLs;
if ($next eq ';') {
$self->{reader}->get;
$self->_property_list($properties);
}
}
sub _verb {
my ($self) = @_;
my $next = $self->{reader}->try;
#print ">verb: $next\n";
if ($next eq 'has') {
$self->{reader}->get;
return $self->_node;
} elsif ($next eq '>-') {
$self->{reader}->get;
my $node = $self->_node;
my $tk = $self->{reader}->get;
$self->_do_error(104,$tk) unless $tk eq '->';
return $node;
} elsif ($next eq 'is') {
$self->{reader}->get;
my $node = $self->_node;
my $tk = $self->{reader}->get;
$self->_do_error(109,$tk) unless $tk eq 'of';
return $node;
} elsif ($next eq '<-') {
$self->{reader}->get;
my $node = $self->_node;
my $tk = $self->{reader}->get;
$self->_do_error(110,$tk) unless $tk eq '-<';
return $node;
} elsif ($next eq 'a') {
$self->{reader}->get;
return $self->_built_in_verb('rdf','type');
# return '<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>'
} elsif ($next =~ /^=(.*)/) {
$self->{reader}->get;
unshift @{$self->{reader}->{tokens}}, $1 if $1;
return $self->_built_in_verb('daml','equivalentTo');
# return '<http://www.daml.org/2001/03/daml+oil#equivalentTo>';
} else {
#print ">property: $next\n";
return $self->_node;
}
}
sub _object_list {
my ($self, $objects) = @_;
my $next = $self->_eat_EOLs;
#print ">object list: $next\n";
$next = $self->_check_inline_comment($next);
# possible end of entity, check for sticked next char is done
while ($next =~ /^([^"]+)([,;\.\}\]\)])$/) {
$self->{reader}->{tokens}->[0] = $2;
unshift @{$self->{reader}->{tokens}}, $1;
$next = $1;
}
my $obj = $self->_object;
#print ">object is back: $obj\n";
push @$objects, $obj;
# next step
$next = $self->_eat_EOLs;
if ($next eq ',') {
$self->{reader}->get;
$self->_object_list($objects);
}
}
sub _object {
my ($self) = @_;
my $next = $self->_eat_EOLs;
#print ">object: $next:\n";
if ($next =~ /^("(?:\\"|[^\"])*")([\.;,\]\}\)])*$/) {
#print ">complete string1: $next\n";
my $tk = $self->{reader}->get;
unshift @{$self->{reader}->{tokens}}, $2 if $2;
return $self->_unesc_string($1);
} else {
#print ">object is node: $next\n";
$self->_node;
}
}
sub _anonymous_node {
my ($self) = @_;
my $next = $self->{reader}->try;
$next =~ /^([\[\{\(])(.*)$/;
#print ">anonnode1: $1\n";
#print ">anonnode2: $2\n";
$self->{reader}->get;
unshift @{$self->{reader}->{tokens}}, $2 if $2;
if ($1 eq '[') {
#print ">anonnode: []\n";
my $genid = "<$self->{ansuri}g_$self->{gid}>";
$self->{gid}++;
$next = $self->_eat_EOLs;
if ($next =~ /^\](.)*$/) {
$self->_exist_quantif($genid);
} else {
$self->_exist_quantif($genid);
$self->_statement($genid);
}
# next step
$next = $self->_eat_EOLs;
my $tk = $self->{reader}->get;
if ($tk =~ /^\](.+)$/) {
unshift @{$self->{reader}->{tokens}}, $1;
} elsif ($tk ne ']') {
$self->_do_error(107, $tk);
}
return $genid;
} elsif ($1 eq '{') {
#print ">anonnode: {}\n";
my $genid = "<$self->{ansuri}c_$self->{cid}>";
$self->{cid}++;
# ns mapping is passed to inner context
$self->{ns}->{$genid} = {};
foreach (keys %{$self->{ns}->{$self->{context}}}) {
$self->{ns}->{$genid}->{$_} =
$self->{ns}->{$self->{context}}->{$_};
#print ">prefix '$_' passed to inner context\n";
}
my $parent_context = $self->{context};
$self->{context} = $genid;
$self->_exist_quantif($genid); # quantifying the new context
$self->_statement_list; # parsing nested statements
$self->{context} = $parent_context;
# next step
$self->_eat_EOLs;
my $tk = $self->{reader}->get;
#print ">next token: $tk\n";
if ($tk =~ /^\}([,;\.\]\}\)])?$/) {
unshift @{$self->{reader}->{tokens}}, $1 if $1;
} else {
$self->_do_error(108, $tk);
}
return $genid;
} else {
#print ">anonnode: ()\n";
my $next = $self->_eat_EOLs;
# if ($next =~ /^\)([,;\.\]\}\)])*$/) {
if ($next =~ /^\)(.*)$/) {
#print ">void ()\n";
$self->{reader}->get;
unshift @{$self->{reader}->{tokens}}, $1 if $1;
return $self->_built_in_verb('daml','nil');
} else {
#print ">anonnode () starts: $next\n";
my @nodes = ();
until ($next =~ /^.*\)[,;\.\]\}\)]*$/) {
push @nodes, $self->_object;
$next = $self->_eat_EOLs;
}
if ($next =~ /^([^)]*)\)([,;\.\]\}\)]*)$/) {
$self->{reader}->get;
unshift @{$self->{reader}->{tokens}}, $2 if $2;
unshift @{$self->{reader}->{tokens}}, ')';
if ($1) {
unshift @{$self->{reader}->{tokens}}, $1;
push @nodes, $self->_object;
}
$self->{reader}->get;
}
my $pref = $self->_built_in_verb('daml','');
my $i = 0;
my @expnl = (); # expanded node list
foreach (@nodes) {
$i++;
push @expnl, '[';
push @expnl, $pref . 'first';
push @expnl, $_;
push @expnl, ';';
push @expnl, $pref . 'rest';
push @expnl, $pref . 'nil'
if $i == scalar @nodes;
}
for (my $j = 0; $j < $i; $j++) {push @expnl, ']'}
unshift @{$self->{reader}->{tokens}}, @expnl;
my $exp = join(' ', @expnl);
#print ">expanded: $exp\n";
my $genid = $self->_anonymous_node;
return $genid;
}
}
}
########################################
# utils
sub _exist_quantif {
my ($self, $anode) = @_;
if ($self->{quantif}) {
my $qname = $self->_built_in_verb('log','forSome');
#print ">existential quantification: $anode\n";
#print ">CONTEXT: $self->{context}\n";
#print ">SUBJECT: $self->{context}\n";
#print ">PROPERTY: $qname";
#print ">-$anode\n";
$self->_process_statement($self->{context},
[[$qname, $anode]]);
}
}
sub _eat_EOLs {
my ($self) = @_;
my $next = $self->{reader}->try;
while ($next eq ' EOL ') {
$self->{reader}->get;
$next = $self->{reader}->try;
}
return $next;
}
# comment inside a list
sub _check_inline_comment {
my ($self, $next) = @_;
if ($next =~ /^#/) {
$self->_space;
$next = $self->_eat_EOLs;
}
return $next;
}
sub _built_in_verb {
my ($self, $key, $verb) = @_;
# resolves possible NS conflicts
my $i = 1;
while ($self->{ns}->{$self->{context}}->{$self->{hardns}->{$key}->[0]} and
$self->{ns}->{$self->{context}}->{$self->{hardns}->{$key}->[0]} ne
$self->{hardns}->{$key}->[1]) {
$self->{hardns}->{$key}->[0] = "$key$i";
$i++;
}
# adds prefix-NS binding
$self->{ns}->{$self->{context}}->{$self->{hardns}->{$key}->[0]} =
$self->{hardns}->{$key}->[1];
return "$self->{hardns}->{$key}->[0]:$verb";
}
sub _unesc_qname {
my $qname = shift;
#print ">escaped qname: $qname\n";
my $i = 0;
my @unesc = ();
while ($qname =~ /(__+)/) {
my $res = substr(sprintf("%b", length($1) + 1), 1);
$res =~ s/1/-/g;
$res =~ s/0/_/g;
$qname =~ s/__+/<$i>/;
push @unesc, $res;
$i++;
}
for ($i=0; $i<@unesc; $i++) { $qname =~ s/<$i>/$unesc[$i]/; }
#print ">unescaped qname: $qname\n";
return $qname;
}
sub _unesc_string {
my ($self, $str) = @_;
$str =~ s/\\\n//go;
$str =~ s/\\\\/\\/go;
$str =~ s/\\'/'/go;
$str =~ s/\\"/"/go;
$str =~ s/\\n/\n/go;
$str =~ s/\\r/\r/go;
$str =~ s/\\t/\t/go;
$str =~ s/\\u([\da-fA-F]{4})/pack('U',hex($1))/ge;
$str =~ s/\\U00([\da-fA-F]{6})/pack('U',hex($1))/ge;
$str =~ s/\\([\da-fA-F]{3})/pack('C',oct($1))/ge; #deprecated
$str =~ s/\\x([\da-fA-F]{2})/pack('C',hex($1))/ge; #deprecated
return $str;
}
########################################
sub _do_error {
my ($self, $n, $tk) = @_;
my %msg = (
1 => 'file not specified',
2 => 'file not found',
3 => 'string not specified',
4 => 'invalid parameter of quantification method (0|1)',
101 => 'bind directive is obsolete, use @prefix instead',
102 => 'invalid namespace prefix',
103 => 'invalid URI reference (uri_ref2)',
104 => 'end of verb (->) expected',
105 => 'invalid characters in string1',
106 => 'namespace prefix not bound',
107 => 'invalid end of anonnode, ] expected',
108 => 'invalid end of anonnode, } expected',
109 => 'end of verb (of) expected',
110 => 'end of verb (-<) expected',
111 => 'string1 ("...") is not terminated',
112 => 'invalid characters in string2',
113 => 'string2 ("""...""")is not terminated',
114 => 'string1 ("...") can\'t include newlines',
115 => 'end of statement expected',
116 => 'invalid node',
117 => 'last keyword expected',
199 => ':- token not supported yet',
201 => '[Triples] attempt to add invalid node',
202 => '[Triples] literal not allowed as subject or predicate',
#301 => '[SAX] systemID source not implemented',
302 => '[SAX] characterStream source not implemented',
401 => '[XML] unable to convert URI predicate to QName',
402 => '[XML] subject not recognized - internal error',
501 => '[RDFCore] literal not allowed as subject',
502 => '[RDFCore] valid storage not specified',
503 => '[RDFStore] literal not allowed as subject',
);
my $msg = "[Error $n]";
$msg .= " line $self->{reader}->{ln}, token" if $n > 100;
$msg .= " \"$tk\"\n";
$msg .= "$msg{$n}!\n";
croak $msg;
}
1;