RDF::Trine::Serializer::NTriples::Canonical - Canonical representation of an RDF model


RDF-Trine documentation Contained in the RDF-Trine distribution.

Index


Code Index:

NAME

Top

RDF::Trine::Serializer::NTriples::Canonical - Canonical representation of an RDF model

VERSION

Top

This document describes RDF::Trine::Serializer::NTriples::Canonical version 0.135

SYNOPSIS

Top

  use RDF::Trine::Serializer::NTriples::Canonical;
  my $serializer = RDF::Trine::Serializer::NTriples->new( onfail=>'truncate' );
  $serializer->serialize_model_to_file(FH, $model);

DESCRIPTION

Top

This module produces a canonical string representation of an RDF graph. If the graph contains blank nodes, sometimes there is no canonical representation that can be produced. The 'onfail' option allows you to decide what is done in those circumstances:

* truncate - drop problematic triples and only serialize a subgraph.
* append - append problematic triples to the end of graph. The result will be non-canonical. This is the default behaviour.
* space - As with 'append', but leave a blank line separating the canonical and non-canonical portions of the graph.
* die - cause a fatal error.

Other than the 'onfail' option, this package has exactly the same interface as RDF::Trine::Serializer::NTriples, providing serialize_model_to_file and serialize_model_to_string methods.

This package will be considerably slower than the non-canonicalising serializer though, so should only be used for small to medium-sized graphs, and only when you need canonicalisation (e.g. for side-by-side comparison of two graphs to check they're isomorphic; or creating a canonical representation for digital signing).

METHODS

Top

Beyond the methods documented below, this class inherits methods from the RDF::Trine::Serializer::NTriples class.

new ( [ onfail => $rule ] )

Returns a new Canonical N-Triples serializer object. If specified, the value of the 'onfail' argument dictates the handling of blank nodes with no canonical representation. The allowable rule values are 'truncate', 'append', 'space', and 'die', and their respective behaviour is described in DESCRIPTION above.

serialize_model_to_file ( $fh, $model )

Serializes the $model to canonical NTriples, printing the results to the supplied filehandle <$fh>.

serialize_model_to_string ( $model )

Serializes the $model to canonical NTriples, returning the result as a string.

BUGS

Top

Please report any bugs to <http://rt.cpan.org/>.

SEE ALSO

Top

Signing RDF Graphs, Jeremey J Carroll, Digital Media Systems Laboratory, HB Laboratories Bristol. HPL-2003-142, 23 July 2003. http://www.hpl.hp.com/techreports/2003/HPL-2003-142.pdf.

RDF::Trine, RDF::Trine::Serializer::NTriples.

http://www.perlrdf.org/.

AUTHOR

Top

Toby Inkster, <tobyink@cpan.org>

COPYRIGHT AND LICENSE

Top


RDF-Trine documentation Contained in the RDF-Trine distribution.
package RDF::Trine::Serializer::NTriples::Canonical;

use 5.008001;
use strict;
use warnings;

use Carp;
use RDF::Trine;
use base qw(RDF::Trine::Serializer::NTriples);

######################################################################

our ($VERSION);
BEGIN {
	$VERSION	= '0.135';
	$RDF::Trine::Serializer::serializer_names{ 'ntriples-canonical' }	= __PACKAGE__;
# 	foreach my $type (qw(text/plain)) {
# 		$RDF::Trine::Serializer::media_types{ $type }	= __PACKAGE__;
# 	}
}

######################################################################

sub new {
	my $class = shift;
	my %opts;
	
	while (@_) {
		my $field = lc shift;
		my $value = shift;
		$opts{$field} = $value;
	}
	
	return bless \%opts, $class;
}

sub serialize_model_to_file {
	my $self  = shift;
	my $file  = shift;
	my $model = shift;
	
	my $string = $self->serialize_model_to_string($model);
	print {$file} $string;
}

sub serialize_model_to_string {
	my $self  = shift;
	my $model = shift;
	
	my $blankNodes = {};
	my @statements;
	
	my $stream = $model->as_stream;
	while (my $ST = $stream->next) {
		push @statements, { 'trine' => $ST };
		
		if ($ST->subject->isa('RDF::Trine::Node::Blank')) {
			$blankNodes->{ $ST->subject->blank_identifier }->{'trine'} = $ST->subject;
		}
		
		if ($ST->object->isa('RDF::Trine::Node::Blank')) {
			$blankNodes->{ $ST->object->blank_identifier }->{'trine'} = $ST->object;
		}
	}
	
	my %lexCounts;
	
	foreach my $st (@statements) {
		# Really need to canonicalise typed literals as per XSD.
		
		$st->{'lex'} = sprintf('%s %s %s',
			($st->{'trine'}->subject->isa('RDF::Trine::Node::Blank') ? '~' : $st->{'trine'}->subject->sse),
			$st->{'trine'}->predicate->sse,
			($st->{'trine'}->object->isa('RDF::Trine::Node::Blank') ? '~' : $st->{'trine'}->object->sse)
			);
		$lexCounts{ $st->{'lex'} }++;
	}

	my $blankNodeCount   = scalar keys %$blankNodes;
	my $blankNodeLength  = length "$blankNodeCount";
	my $blankNodePattern = '_:g%0'.$blankNodeLength.'d';
	my $hardNodePattern  = '_:h%0'.$blankNodeLength.'d';
	
	@statements = sort { $a->{'lex'} cmp $b->{'lex'} } @statements;
	
	my $genSymCounter = 1;
	
	foreach my $st (@statements) {
		next unless $lexCounts{ $st->{'lex'} } == 1;
		
		if ($st->{'trine'}->object->isa('RDF::Trine::Node::Blank')) {
			unless (defined $blankNodes->{ $st->{'trine'}->object->blank_identifier }->{'lex'}) {
				$blankNodes->{ $st->{'trine'}->object->blank_identifier }->{'lex'} =
					sprintf($blankNodePattern, $genSymCounter);
				$genSymCounter++;
			}
			my $b = $blankNodes->{ $st->{'trine'}->object->blank_identifier }->{'lex'};
			$st->{'lex'} =~ s/\~$/$b/;
		}
		
		if ($st->{'trine'}->subject->isa('RDF::Trine::Node::Blank')) {
			unless (defined $blankNodes->{ $st->{'trine'}->subject->blank_identifier }->{'lex'}) {
				$blankNodes->{ $st->{'trine'}->subject->blank_identifier }->{'lex'} =
					sprintf($blankNodePattern, $genSymCounter);
				$genSymCounter++;
			}
			my $b = $blankNodes->{ $st->{'trine'}->subject->blank_identifier }->{'lex'};
			$st->{'lex'} =~ s/^\~/$b/;
		}
	}
	
	foreach my $st (@statements) {
		if ($st->{'lex'} =~ /\~$/) {
			if (defined $blankNodes->{ $st->{'trine'}->object->blank_identifier }->{'lex'}) {
				my $b = $blankNodes->{ $st->{'trine'}->object->blank_identifier }->{'lex'};
				$st->{'lex'} =~ s/\~$/$b/;
			}
		}
		
		if ($st->{'lex'} =~ /^\~/) {
			if (defined $blankNodes->{ $st->{'trine'}->subject->blank_identifier }->{'lex'}) {
				my $b = $blankNodes->{ $st->{'trine'}->subject->blank_identifier }->{'lex'};
				$st->{'lex'} =~ s/^\~/$b/;
			}
		}
	}
	
	@statements = sort { $a->{'lex'} cmp $b->{'lex'} } @statements;
	
	my @canonicalStatements;
	my @otherStatements;
	foreach my $st (@statements) {
		if ($st->{'lex'} =~ /(^\~)|(\~$)/) {
			if (lc $self->{'onfail'} eq 'die') {
				croak "Model could not be canonicalised";
			} elsif (lc $self->{'onfail'} eq 'truncate') {
				next;
			}
			
			if ($st->{'lex'} =~ /\~$/) {
				unless (defined $blankNodes->{ $st->{'trine'}->object->blank_identifier }->{'lex'}) {
					$blankNodes->{ $st->{'trine'}->object->blank_identifier }->{'lex'} =
						sprintf($hardNodePattern, $genSymCounter);
					$genSymCounter++;
				}
				my $b = $blankNodes->{ $st->{'trine'}->object->blank_identifier }->{'lex'};
				$st->{'lex'} =~ s/\~$/$b/;
			}

			if ($st->{'lex'} =~ /^\~/) {
				unless (defined $blankNodes->{ $st->{'trine'}->subject->blank_identifier }->{'lex'}) {
					$blankNodes->{ $st->{'trine'}->subject->blank_identifier }->{'lex'} =
						sprintf($hardNodePattern, $genSymCounter);
					$genSymCounter++;
				}
				my $b = $blankNodes->{ $st->{'trine'}->subject->blank_identifier }->{'lex'};
				$st->{'lex'} =~ s/^\~/$b/;
			}

			push @otherStatements, $st;
		} else {
			push @canonicalStatements, $st;
		}
	}
	
	my $rv = '';
	foreach my $st (@canonicalStatements) {
		$rv .= $st->{'lex'} . " .\r\n";
	}

	$rv .= "\r\n"
		if (defined($self->{'onfail'}) && (lc $self->{'onfail'} eq 'space'));
	
	foreach my $st (@otherStatements) {
		$rv .= $st->{'lex'} . " .\r\n";
	}

	return $rv;
}

1;
__END__