| RDF-Query documentation | Contained in the RDF-Query distribution. |
RDF::Query::BGPOptimizer - Optimizer for ordering the joins of triple patterns in a BGP
This document describes RDF::Query::BGPOptimizer version 2.907.
ordered_triples ( $context, @triples )Returns a list of triples, ordered so as to optimize a left-deep join plan based on the frequency counts provided by the underlying model.
Gregory Todd Williams <gwilliams@cpan.org>
| RDF-Query documentation | Contained in the RDF-Query distribution. |
# RDF::Query::BGPOptimizer # -----------------------------------------------------------------------------
package RDF::Query::BGPOptimizer; use strict; use warnings; use Data::Dumper; use List::Util qw(reduce); use Scalar::Util qw(blessed reftype refaddr); use RDF::Query::Error qw(:try); ###################################################################### our ($VERSION); BEGIN { $VERSION = '2.907'; } ######################################################################
sub ordered_triples { my $self = shift; my $context = shift; my @triples = @_; my $model = $context->model; my %vars; my %seen; my @weighted = map { my $triple = RDF::Query::Plan::Triple->new( $_->nodes ); [ $_, $self->_cost( $triple, $context ) ] } @triples; my %triples = map { refaddr($_->[0]) => $_ } @weighted; my @ordered = sort { $a->[1] <=> $b->[1] } @weighted; foreach my $t (@triples) { my @vars = $self->_triple_vars( $t ); foreach my $name (@vars) { push( @{ $vars{ $name } }, $t ) unless ($seen{ $name }{ refaddr($t) }++); } } my %edges; foreach my $name (keys %vars) { my @triples = @{ $vars{ $name } }; foreach my $t (@triples) { my $ta = refaddr($t); foreach my $u (@triples) { my $ua = refaddr($u); next if ($ta == $ua); $edges{ $ta }{ $ua } = $u; } } } my @final; my %used; my $start = shift(@ordered); $used{ refaddr($start) }++; push(@final, $start); my @seen = refaddr($start->[0]); my $count = 0; while (@ordered) { if (++$count > scalar(@triples)) { die "loop in BGPOptimizer (?)"; } my @frontier = grep { not($used{refaddr($_)}) } map { $triples{ $_ } } map { keys(%{ $edges{ $_ } }) } @seen; my @orderedf = sort { $a->[1] <=> $b->[1] } @frontier; if (@orderedf) { my $next = shift(@orderedf); my $addr = refaddr($next); $used{ $addr }++; push(@final, $next); push(@seen, refaddr($next->[0])); @ordered = grep { refaddr($_) != $addr } @ordered; } else { my $next = shift(@ordered); my $addr = refaddr($next); $used{ $addr }++; push(@final, $next); push(@seen, refaddr($next->[0])); } } return map { $_->[0] } @final; } sub _cost { my $self = shift; my $pattern = shift; my $context = shift; my $l = Log::Log4perl->get_logger("rdf.query.bgpoptimizer"); my $bf = $pattern->bf( $context ); my $f = ($bf =~ tr/f//); my $r = $f / 3; $l->debug( "Pattern has bf representation '$bf'" ); $l->debug( "There are $f of 3 free variables" ); $l->debug( 'Estimated cardinality of triple is : ' . $r ); # round the cardinality to an integer return int($r + .5 * ($r <=> 0)); } sub _triple_vars { my $self = shift; my $t = shift; my @nodes = $t->nodes; my (@vars, %seen); foreach my $n (@nodes) { if ($n->isa('RDF::Trine::Node::Variable')) { my $name = $n->name; push(@vars, $name) unless ($seen{ $name }++); } } return @vars; } 1; __END__