RDF::Trine::Node::Literal - RDF Node class for literals


RDF-Trine documentation Contained in the RDF-Trine distribution.

Index


Code Index:

NAME

Top

RDF::Trine::Node::Literal - RDF Node class for literals

VERSION

Top

This document describes RDF::Trine::Node::Literal version 0.135

METHODS

Top

Beyond the methods documented below, this class inherits methods from the RDF::Trine::Node class.

new ( $string, $lang, $datatype, $canonical_flag )

Returns a new Literal structure.

literal_value

Returns the string value of the literal.

literal_value_language

Returns the language tag of the ltieral.

literal_datatype

Returns the datatype of the literal.

value

Returns the literal value.

sse

Returns the SSE string for this literal.

as_string

Returns a string representation of the node.

as_ntriples

Returns the node in a string form suitable for NTriples serialization.

type

Returns the type string of this node.

has_language

Returns true if this literal is language-tagged, false otherwise.

has_datatype

Returns true if this literal is datatyped, false otherwise.

equal ( $node )

Returns true if the two nodes are equal, false otherwise.

canonicalize_literal_value ( $string, $datatype, $warn )

If $datatype is a recognized datatype, returns the canonical lexical representation of the value $string. Otherwise returns $string.

Currently, xsd:integer, xsd:decimal, and xsd:boolean are canonicalized. Additionaly, invalid lexical forms for xsd:float, xsd:double, and xsd:dateTime will trigger a warning.

is_valid_lexical_form

Returns true if the node is of a recognized datatype and has a valid lexical form for that datatype. If the lexical form is invalid, returns false. If the datatype is unrecognized, returns zero-but-true.

is_numeric_type

Returns true if the literal is a known (xsd) numeric type.

numeric_value

Returns the numeric value of the literal (even if the literal isn't a known numeric type.

AUTHOR

Top

Gregory Todd Williams <gwilliams@cpan.org>

COPYRIGHT

Top


RDF-Trine documentation Contained in the RDF-Trine distribution.
# RDF::Trine::Node::Literal
# -----------------------------------------------------------------------------

package RDF::Trine::Node::Literal;

use strict;
use warnings;
no warnings 'redefine';
use base qw(RDF::Trine::Node);

use RDF::Trine::Error;
use Data::Dumper;
use Scalar::Util qw(blessed);
use Carp qw(carp croak confess);

######################################################################

our ($VERSION, $USE_XMLLITERALS, $USE_FORMULAE);
BEGIN {
	$VERSION	= '0.135';
	eval "use RDF::Trine::Node::Literal::XML;";
	$USE_XMLLITERALS	= (RDF::Trine::Node::Literal::XML->can('new')) ? 1 : 0;
	eval "use RDF::Trine::Node::Formula;";
	$USE_FORMULAE = (RDF::Trine::Node::Formula->can('new')) ? 1 : 0;
}

######################################################################

use overload	'""'	=> sub { $_[0]->sse },
			;

sub new {
	my $class	= shift;
	my $literal	= shift;
	my $lang	= shift;
	my $dt		= shift;
	my $canon	= shift;
	
	if (blessed($dt) and $dt->isa('RDF::Trine::Node::Resource')) {
		$dt	= $dt->uri_value;
	}
	
	if ($dt and $canon) {
		$literal	= $class->canonicalize_literal_value( $literal, $dt );
	}
	
	if ($USE_XMLLITERALS and defined($dt) and $dt eq 'http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral') {
		return RDF::Trine::Node::Literal::XML->new( $literal, $lang, $dt );
	} elsif ($USE_FORMULAE and defined($dt) and $dt eq RDF::Trine::Node::Formula->literal_datatype) {
		return RDF::Trine::Node::Formula->new( $literal );
	} else {
		return $class->_new( $literal, $lang, $dt );
	}
}

sub _new {
	my $class	= shift;
	my $literal	= shift;
	my $lang	= shift;
	my $dt		= shift;
	my $self;

	if ($lang and $dt) {
		throw RDF::Trine::Error::MethodInvocationError ( -text => "Literal values cannot have both language and datatype" );
	}
	
	if ($lang) {
		$self	= [ $literal, lc($lang), undef ];
	} elsif ($dt) {
		if (blessed($dt)) {
			$dt	= $dt->uri_value;
		}
		$self	= [ $literal, undef, $dt ];
	} else {
		$self	= [ $literal ];
	}
	return bless($self, $class);
}

sub literal_value {
	my $self	= shift;
	if (@_) {
		$self->[0]	= shift;
	}
	return $self->[0];
}

sub literal_value_language {
	my $self	= shift;
	return $self->[1];
}

sub literal_datatype {
	my $self	= shift;
	return $self->[2];
}

sub value {
	my $self	= shift;
	return $self->literal_value;
}

sub sse {
	my $self	= shift;
	my $literal	= $self->literal_value;
	my $escaped	= $self->_unicode_escape( $literal );
	$literal	= $escaped;
	if (defined(my $lang = $self->literal_value_language)) {
		return qq("${literal}"\@${lang});
	} elsif (defined(my $dt = $self->literal_datatype)) {
		return qq("${literal}"^^<${dt}>);
	} else {
		return qq("${literal}");
	}
}

sub as_string {
	my $self	= shift;
	my $string	= '"' . $self->literal_value . '"';
	if (defined(my $dt = $self->literal_datatype)) {
		$string	.= '^^<' . $dt . '>';
	} elsif (defined(my $lang = $self->literal_value_language)) {
		$string	.= '@' . $lang;
	}
	return $string;
}

sub as_ntriples {
	my $self	= shift;
	my $literal	= $self->literal_value;
	my $escaped	= $self->_unicode_escape( $literal );
	$literal	= $escaped;
	if (defined(my $lang = $self->literal_value_language)) {
		return qq("${literal}"\@${lang});
	} elsif (defined(my $dt = $self->literal_datatype)) {
		return qq("${literal}"^^<${dt}>);
	} else {
		return qq("${literal}");
	}
}

sub type {
	return 'LITERAL';
}

sub has_language {
	my $self	= shift;
	return defined($self->literal_value_language) ? 1 : 0;
}

sub has_datatype {
	my $self	= shift;
	return defined($self->literal_datatype) ? 1 : 0;
}

sub equal {
	my $self	= shift;
	my $node	= shift;
	return 0 unless (blessed($node) and $node->isa('RDF::Trine::Node::Literal'));
	return 0 unless ($self->literal_value eq $node->literal_value);
	if ($self->literal_datatype or $node->literal_datatype) {
		no warnings 'uninitialized';
		return 0 unless ($self->literal_datatype eq $node->literal_datatype);
	}
	if ($self->literal_value_language or $node->literal_value_language) {
		no warnings 'uninitialized';
		return 0 unless ($self->literal_value_language eq $node->literal_value_language);
	}
	return 1;
}

# called to compare two nodes of the same type
sub _compare {
	my $a	= shift;
	my $b	= shift;
	if ($a->literal_value ne $b->literal_value) {
		return ($a->literal_value cmp $b->literal_value);
	}
	
	# the nodes have the same lexical value
	if ($a->has_language and $b->has_language) {
		return ($a->literal_value_language cmp $b->literal_value_language);
	}
	
	if ($a->has_datatype and $b->has_datatype) {
		return ($a->literal_datatype cmp $b->literal_datatype);
	} elsif ($a->has_datatype) {
		return 1;
	} elsif ($b->has_datatype) {
		return -1;
	}
	
	return 0;
}

sub canonicalize_literal_value {
	my $self	= shift;
	my $value	= shift;
	my $dt		= shift;
	my $warn	= shift;
	
	if ($dt eq 'http://www.w3.org/2001/XMLSchema#integer') {
		if ($value =~ m/^([-+])?(\d+)$/) {
			my $sign	= $1 || '';
			my $num		= $2;
			$sign		= '' if ($sign eq '+');
			$num		=~ s/^0+(\d)/$1/;
			return "${sign}${num}";
		} else {
			warn "Bad lexical form for xsd:integer: '$value'" if ($warn);
		}
	} elsif ($dt eq 'http://www.w3.org/2001/XMLSchema#decimal') {
		if ($value =~ m/^([-+])?((\d+)([.]\d*)?)$/) {
			my $sign	= $1 || '';
			my $num		= $2;
			my $int		= $3;
			my $frac	= $4;
			$sign		= '' if ($sign eq '+');
			$num		=~ s/^0+(.)/$1/;
			$num		=~ s/[.](\d)0+$/.$1/;
			if ($num =~ /^[.]/) {
				$num	= "0$num";
			}
			if ($num !~ /[.]/) {
				$num	= "${num}.0";
			}
			return "${sign}${num}";
		} elsif ($value =~ m/^([-+])?([.]\d+)$/) {
			my $sign	= $1 || '';
			my $num		= $2;
			$sign		= '' if ($sign eq '+');
			$num		=~ s/^0+(.)/$1/;
			return "${sign}${num}";
		} else {
			warn "Bad lexical form for xsd:deciaml: '$value'" if ($warn);
			$value		= sprintf('%f', $value);
		}
	} elsif ($dt eq 'http://www.w3.org/2001/XMLSchema#float') {
		if ($value =~ m/^(?:([-+])?(?:(\d+(?:\.\d*)?|\.\d+)([Ee][-+]?\d+)?|(INF)))|(NaN)$/) {
			my $sign	= $1;
			my $inf		= $4;
			my $nan		= $5;
			no warnings 'uninitialized';
			$sign		= '' if ($sign eq '+');
			return "${sign}$inf" if ($inf);
			return $nan if ($nan);

			$value		= sprintf('%E', $value);
			$value 		=~ m/^(?:([-+])?(?:(\d+(?:\.\d*)?|\.\d+)([Ee][-+]?\d+)?|(INF)))|(NaN)$/;
			$sign		= $1;
			$inf		= $4;
			$nan		= $5;
			my $num		= $2;
			my $exp		= $3;
			$num		=~ s/[.](\d+?)0+/.$1/;
			$exp	=~ tr/e/E/;
			$exp	=~ s/E[+]/E/;
			$exp	=~ s/E(-?)0+([1-9])$/E$1$2/;
			$exp	=~ s/E(-?)0+$/E${1}0/;
			return "${sign}${num}${exp}";
		} else {
			warn "Bad lexical form for xsd:float: '$value'" if ($warn);
			$value	= sprintf('%E', $value);
			$value	=~ s/E[+]/E/;
			$value	=~ s/E0+(\d)/E$1/;
			$value	=~ s/(\d)0+E/$1E/;
		}
	} elsif ($dt eq 'http://www.w3.org/2001/XMLSchema#double') {
		if ($value =~ m/^(?:([-+])?(?:(\d+(?:\.\d*)?|\.\d+)([Ee][-+]?\d+)?|(INF)))|(NaN)$/) {
			my $sign	= $1;
			my $inf		= $4;
			my $nan		= $5;
			no warnings 'uninitialized';
			$sign		= '' if ($sign eq '+');
			return "${sign}$inf" if ($inf);
			return $nan if ($nan);

			$value		= sprintf('%E', $value);
			$value 		=~ m/^(?:([-+])?(?:(\d+(?:\.\d*)?|\.\d+)([Ee][-+]?\d+)?|(INF)))|(NaN)$/;
			$sign		= $1;
			$inf		= $4;
			$nan		= $5;
			my $num		= $2;
			my $exp		= $3;
			$num		=~ s/[.](\d+?)0+/.$1/;
			$exp	=~ tr/e/E/;
			$exp	=~ s/E[+]/E/;
			$exp	=~ s/E(-?)0+([1-9])$/E$1$2/;
			$exp	=~ s/E(-?)0+$/E${1}0/;
			return "${sign}${num}${exp}";
		} else {
			warn "Bad lexical form for xsd:double: '$value'" if ($warn);
			$value	= sprintf('%E', $value);
			$value	=~ s/E[+]/E/;
			$value	=~ s/E0+(\d)/E$1/;
			$value	=~ s/(\d)0+E/$1E/;
		}
	} elsif ($dt eq 'http://www.w3.org/2001/XMLSchema#boolean') {
		if ($value =~ m/^(true|false|0|1)$/) {
			$value	= 'true' if ($value eq '1');
			$value	= 'false' if ($value eq '0');
			return $value;
		} else {
			warn "Bad lexical form for xsd:boolean: '$value'" if ($warn);
		}
	} elsif ($dt eq 'http://www.w3.org/2001/XMLSchema#dateTime') {
		if ($value =~ m/^-?([1-9]\d{3,}|0\d{3})-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])T(([01]\d|2[0-3]):[0-5]\d:[0-5]\d(\.\d+)?|(24:00:00(\.0+)?))(Z|(\+|-)((0\d|1[0-3]):[0-5]\d|14:00))?$/) {
			# XXX need to canonicalize the dateTime
			return $value;
		} else {
			warn "Bad lexical form for xsd:boolean: '$value'" if ($warn);
		}
	}
	return $value;
}

sub is_valid_lexical_form {
	my $self	= shift;
	my $value	= $self->literal_value;
	my $dt		= $self->literal_datatype;
	
	unless ($dt =~ qr<^http://www.w3.org/2001/XMLSchema#(integer|decimal|float|double|boolean|dateTime|non(Positive|Negative)Integer|(positive|negative)Integer|long|int|short|byte|unsigned(Long|Int|Short|Byte))>) {
		return '0E0';	# zero but true (it's probably ok, but we don't recognize the datatype)
	}
	
	if ($dt =~ m<http://www.w3.org/2001/XMLSchema#(integer|non(Positive|Negative)Integer|(positive|negative)Integer|long|int|short|byte|unsigned(Long|Int|Short|Byte))>) {
		if ($value =~ m/^([-+])?(\d+)$/) {
			return 1;
		} else {
			return 0;
		}
	} elsif ($dt eq 'http://www.w3.org/2001/XMLSchema#decimal') {
		if ($value =~ m/^([-+])?((\d+)([.]\d*)?)$/) {
			return 1;
		} elsif ($value =~ m/^([-+])?([.]\d+)$/) {
			return 1;
		} else {
			return 0;
		}
	} elsif ($dt eq 'http://www.w3.org/2001/XMLSchema#float') {
		if ($value =~ m/^[-+]?(\d+(\.\d*)?|\.\d+)([Ee][-+]?\d+)?|[-+]?INF|NaN$/) {
			return 1;
		} else {
			return 0;
		}
	} elsif ($dt eq 'http://www.w3.org/2001/XMLSchema#double') {
		if ($value =~ m/^[-+]?((\d+(\.\d*)?)|(\.\d+))([Ee][-+]?\d+)?|[-+]?INF|NaN$/) {
			return 1;
		} else {
			return 0;
		}
	} elsif ($dt eq 'http://www.w3.org/2001/XMLSchema#boolean') {
		if ($value =~ m/^(true|false|0|1)$/) {
			return 1;
		} else {
			return 0;
		}
	} elsif ($dt eq 'http://www.w3.org/2001/XMLSchema#dateTime') {
		if ($value =~ m/^-?([1-9]\d{3,}|0\d{3})-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])T(([01]\d|2[0-3]):[0-5]\d:[0-5]\d(\.\d+)?|(24:00:00(\.0+)?))(Z|(\+|-)((0\d|1[0-3]):[0-5]\d|14:00))?$/) {
			return 1;
		} else {
			return 0;
		}
	}
	return 0;
}

sub is_numeric_type {
	my $self	= shift;
	return 0 unless ($self->has_datatype);
	my $type	= $self->literal_datatype;
	if ($type =~ qr<^http://www.w3.org/2001/XMLSchema#(integer|decimal|float|double|non(Positive|Negative)Integer|(positive|negative)Integer|long|int|short|byte|unsigned(Long|Int|Short|Byte))>) {
		return 1;
	} else {
		return 0;
	}
}

sub numeric_value {
	my $self	= shift;
	if ($self->is_numeric_type) {
		my $value	= $self->literal_value;
		if (looks_like_number($value)) {
			my $v	= 0 + eval "$value";
			return $v;
		} else {
			throw RDF::Query::Error::TypeError -text => "Literal with numeric type does not appear to have numeric value.";
		}
	} elsif (not $self->has_datatype) {
		if (looks_like_number($self->literal_value)) {
			return 0+$self->literal_value;
		} else {
			return;
		}
	} elsif ($self->literal_datatype eq 'http://www.w3.org/2001/XMLSchema#boolean') {
		return ($self->literal_value eq 'true') ? 1 : 0;
	} else {
		return;
	}
}

1;

__END__