RDF::Trine::Parser - RDF Parser class


RDF-Trine documentation Contained in the RDF-Trine distribution.

Index


Code Index:

NAME

Top

RDF::Trine::Parser - RDF Parser class

VERSION

Top

This document describes RDF::Trine::Parser version 0.135

SYNOPSIS

Top

 use RDF::Trine::Parser;

 RDF::Trine::Parser->parse_url_into_model( $url, $model );

 my $parser	= RDF::Trine::Parser->new( 'turtle' );
 $parser->parse_into_model( $base_uri, $rdf, $model );

 $parser->parse_file_into_model( $base_uri, 'data.ttl', $model );

DESCRIPTION

Top

RDF::Trine::Parser is a base class for RDF parsers. It may be used as a factory class for constructing parser objects by name or media type with the new method, or used to abstract away the logic of choosing a parser based on the media type of RDF content retrieved over the network with the parse_url_into_model method.

METHODS

Top

media_type

Returns the canonical media type associated with this parser.

media_types

Returns the media types associated with this parser.

parser_by_media_type ( $media_type )

Returns the parser class appropriate for parsing content of the specified media type.

guess_parser_by_filename ( $filename )

Returns the best-guess parser class to parse a file with the given filename.

new ( $parser_name, @args )

Returns a new RDF::Trine::Parser object for the parser with the specified name (e.g. "rdfxml" or "turtle"). If no parser with the specified name is found, throws a RDF::Trine::Error::ParserError exception.

Any @args will be passed through to the format-specific parser constructor.

If @args contains the key-value pair (canonicalize => 1), literal value canonicalization will be attempted during parsing with warnings being emitted for invalid lexical forms for recognized datatypes.

parse_url_into_model ( $url, $model [, %args] )

Retrieves the content from $url and attempts to parse the resulting RDF into $model using a parser chosen by the associated content media type.

parse_into_model ( $base_uri, $data, $model [, context => $context] )

Parses the $data, using the given $base_uri. For each RDF statement parsed, will call $model->add_statement( $statement ).

parse_file_into_model ( $base_uri, $fh, $model [, context => $context] )

Parses all data read from the filehandle or file $fh, using the given $base_uri. For each RDF statement parsed, will call $model->add_statement( $statement ).

parse_file ( $base_uri, $fh, $handler )

Parses all data read from the filehandle or file $fh, using the given $base_uri. If $fh is a filename, this method can guess the associated parse. For each RDF statement parses $handler is called.

parse ( $base_uri, $rdf, \&handler )
parse_into_model ( $base_uri, $data, $model )

AUTHOR

Top

Gregory Todd Williams <gwilliams@cpan.org>

COPYRIGHT

Top


RDF-Trine documentation Contained in the RDF-Trine distribution.
# RDF::Trine::Parser
# -----------------------------------------------------------------------------

package RDF::Trine::Parser;

use strict;
use warnings;
no warnings 'redefine';
use Data::Dumper;
use Encode qw(decode);
use LWP::MediaTypes;

our ($VERSION);
our %file_extensions;
our %parser_names;
our %canonical_media_types;
our %media_types;
our %format_uris;
our %encodings;
BEGIN {
	$VERSION	= '0.135';
}

use Scalar::Util qw(blessed);
use LWP::UserAgent;

use RDF::Trine::Error qw(:try);
use RDF::Trine::Parser::NTriples;
use RDF::Trine::Parser::NQuads;
use RDF::Trine::Parser::Turtle;
use RDF::Trine::Parser::TriG;
use RDF::Trine::Parser::RDFXML;
use RDF::Trine::Parser::RDFJSON;
use RDF::Trine::Parser::RDFa;

sub media_type {
	my $self	= shift;
	my $class	= ref($self) || $self;
	return $canonical_media_types{ $class };
}

sub media_types {
	my $self	= shift;
	my @types;
	foreach my $type (keys %media_types) {
		my $class	= $media_types{ $type };
		push(@types, $type) if ($self->isa($class));
	}
	return @types;
}

sub parser_by_media_type {
	my $proto	= shift;
	my $type	= shift;
	my $class	= $media_types{ $type };
	return $class;
}

sub guess_parser_by_filename {
	my $class	= shift;
	my $file	= shift;
	if ($file =~ m/[.](\w+)$/) {
		my $ext	= $1;
		return $file_extensions{ $ext } if exists $file_extensions{ $ext };
	}
	return $class->parser_by_media_type( 'application/rdf+xml' ) || 'RDF::Trine::Parser::RDFXML';
}

sub new {
	my $class	= shift;
	my $name	= shift;
	my $key		= lc($name);
	$key		=~ s/[^a-z]//g;

	if ($name eq 'guess') {
		throw RDF::Trine::Error::UnimplementedError -text => "guess parser heuristics are not implemented yet";
	} elsif (my $class = $parser_names{ $key }) {
		# re-add name for multiformat (e.g. Redland) parsers
		return $class->new( name => $key, @_ );
	} else {
		throw RDF::Trine::Error::ParserError -text => "No parser known named $name";
	}
}

sub parse_url_into_model {
	my $class	= shift;
	my $url		= shift;
	my $model	= shift;
	my %args	= @_;
	
	my $ua		= LWP::UserAgent->new( agent => "RDF::Trine/$RDF::Trine::VERSION" );
	
	# prefer RDF/XML or Turtle, then anything else that we've got a parser for.
	my $accept	= join(',', map { /(turtle|rdf[+]xml)/ ? "$_;q=1.0" : "$_;q=0.9" } keys %media_types);
	$ua->default_headers->push_header( 'Accept' => $accept );
	
	my $resp	= $ua->get( $url );
	if ($url =~ /^file:/) {
		my $type	= guess_media_type($url);
		$resp->header('Content-Type', $type);
	}
	
	unless ($resp->is_success) {
		throw RDF::Trine::Error::ParserError -text => $resp->status_line;
	}
	
	my $content	= $resp->content;
	my $type	= $resp->header('content-type');
	$type		=~ s/^([^\s;]+).*/$1/;
	my $pclass	= $media_types{ $type };
	if ($pclass and $pclass->can('new')) {
		my $data	= $content;
		if (my $e = $encodings{ $pclass }) {
			$data	= decode( $e, $content );
		}
		my $parser	= $pclass->new();
		$parser->parse_into_model( $url, $data, $model, %args );
		return 1;
	} else {
		throw RDF::Trine::Error::ParserError -text => "No parser found for content type $type";
	}
	
	### FALLBACK
	my %options;
	if (defined $args{canonicalize}) {
		$options{ canonicalize }	= $args{canonicalize};
	}
	if ($url =~ /[.](x?rdf|owl)$/ or $content =~ m/\x{FEFF}?<[?]xml /smo) {
		my $parser	= RDF::Trine::Parser::RDFXML->new(%options);
		$parser->parse_into_model( $url, $content, $model, %args );
		return 1;
	} elsif ($url =~ /[.]ttl$/ or $content =~ m/@(prefix|base)/smo) {
		my $parser	= RDF::Trine::Parser::Turtle->new(%options);
		my $data	= decode('utf8', $content);
		$parser->parse_into_model( $url, $data, $model, %args );
		return 1;
	} elsif ($url =~ /[.]trig$/) {
		my $parser	= RDF::Trine::Parser::Trig->new(%options);
		my $data	= decode('utf8', $content);
		$parser->parse_into_model( $url, $data, $model, %args );
		return 1;
	} elsif ($url =~ /[.]nt$/) {
		my $parser	= RDF::Trine::Parser::NTriples->new(%options);
		$parser->parse_into_model( $url, $content, $model, %args );
		return 1;
	} elsif ($url =~ /[.]nq$/) {
		my $parser	= RDF::Trine::Parser::NQuads->new(%options);
		$parser->parse_into_model( $url, $content, $model, %args );
		return 1;
	} elsif ($url =~ /[.]js(?:on)?$/) {
		my $parser	= RDF::Trine::Parser::RDFJSON->new(%options);
		$parser->parse_into_model( $url, $content, $model, %args );
		return 1;
	} elsif ($url =~ /[.]x?html?$/) {
		my $parser	= RDF::Trine::Parser::RDFa->new(%options);
		$parser->parse_into_model( $url, $content, $model, %args );
		return 1;
	} else {
		my @types	= keys %{ { map { $_ => 1 } values %media_types } };
		foreach my $pclass (@types) {
			my $data	= $content;
			if (my $e = $encodings{ $pclass }) {
				$data	= decode( $e, $content );
			}
			my $parser	= $pclass->new(%options);
			my $ok		= 0;
			try {
				$parser->parse_into_model( $url, $data, $model, %args );
				$ok	= 1;
			} catch RDF::Trine::Error::ParserError with {};
			return 1 if ($ok);
		}
	}
	throw RDF::Trine::Error::ParserError -text => "Failed to parse data from $url";
}

sub parse_into_model {
	my $proto	= shift;
	my $self	= blessed($proto) ? $proto : $proto->new();
	my $uri		= shift;
	if (blessed($uri) and $uri->isa('RDF::Trine::Node::Resource')) {
		$uri	= $uri->uri_value;
	}
	my $input	= shift;
	my $model	= shift;
	my %args	= @_;
	my $context	= $args{'context'};
	
	my $handler	= sub {
		my $st	= shift;
		if ($context) {
			my $quad	= RDF::Trine::Statement::Quad->new( $st->nodes, $context );
			$model->add_statement( $quad );
		} else {
			$model->add_statement( $st );
		}
	};
	
	$model->begin_bulk_ops();
	my $s	= $self->parse( $uri, $input, $handler );
	$model->end_bulk_ops();
	return $s;
}

sub parse_file_into_model {
	my $proto	= shift;
	my $self	= (blessed($proto) or $proto eq  __PACKAGE__)
			? $proto : $proto->new();
	my $uri		= shift;
	if (blessed($uri) and $uri->isa('RDF::Trine::Node::Resource')) {
		$uri	= $uri->uri_value;
	}
	my $fh		= shift;
	my $model	= shift;
	my %args	= @_;
	my $context	= $args{'context'};
	
	my $handler	= sub {
		my $st	= shift;
		if ($context) {
			my $quad	= RDF::Trine::Statement::Quad->new( $st->nodes, $context );
			$model->add_statement( $quad );
		} else {
			$model->add_statement( $st );
		}
	};
	
	$model->begin_bulk_ops();
	my $s	= $self->parse_file( $uri, $fh, $handler );
	$model->end_bulk_ops();
	return $s;
}

sub parse_file {
	my $self	= shift;
	my $base	= shift;
	my $fh		= shift;
	my $handler	= shift;

	unless (ref($fh)) {
		my $filename	= $fh;
		undef $fh;
		unless ($self->can('parse')) {
			my $pclass = $self->guess_parser_by_filename( $filename );
			$self = $pclass->new() if ($pclass and $pclass->can('new'));
		}
		open( $fh, '<:utf8', $filename ) or throw RDF::Trine::Error::ParserError -text => $!;
	}

	if ($self and $self->can('parse')) {
		my $content	= do { local($/) = undef; <$fh> };
		return $self->parse( $base, $content, $handler, @_ );
	} else {
		throw RDF::Trine::Error::ParserError -text => "Cannot parse unknown serialization";
	}
}


1;

__END__