Skip to content

Instantly share code, notes, and snippets.

@kasei
Created February 19, 2015 22:01
Show Gist options
  • Save kasei/9b52ff6b7c0848d66714 to your computer and use it in GitHub Desktop.
Save kasei/9b52ff6b7c0848d66714 to your computer and use it in GitHub Desktop.
Example of query algebra rewriting in Attean
use v5.14;
use autodie;
use utf8;
use RDF::Query;
use Attean;
use Attean::Algebra;
use Attean::Expression;
use Attean::TreeRewriter;
use AtteanX::RDFQueryTranslator;
# Rewrite BGPs with more than one triple pattern into multiple BGPs, each with
# a single triple pattern, all in a linear join tree. For example:
#
# BGP($a, $b, $c)
#
# becomes:
#
# Join(Join(BGP($a), BGP($b)), BGP($c))
#
# This can be changed to do whatever rewriting you want (e.g. splitting up the
# triple patterns into two groups, one destined to be placed inside a SERVICE
# block, and the other to be wrapped in a new BGP to be executed locally.
sub explode_bgps {
my ($t, $parent, $thunk) = @_;
if ($t->isa('Attean::Algebra::BGP')) {
my @p;
my @triples = @{ $t->triples };
if (scalar(@triples) > 1) {
foreach my $triple (@triples) {
push(@p, Attean::Algebra::BGP->new( triples => [$triple] ));
}
while (scalar(@p) > 1) {
my ($l, $r) = splice(@p, 0, 2);
unshift(@p, Attean::Algebra::Join->new( children => [$l, $r] ));
}
my $new = shift(@p);
return (1, 0, $new);
}
}
return (0, 1, $t);
}
# Change Filter($expr, Join($a, $b, $c)) -> Join($a, Filter($expr, $b), $c)
# for the first join child (in this case $b) that binds all the variables
# necessary to evaluate $expr. If no such child exists, no rewriting takes
# place.
#
# Note: The n-ary Join syntax above where the join has 3 children is just for
# convenience -- the real Join algebra is binary, so Join($a, $b, $c) is
# really Join(Join($a, $b), $c).
sub pushdown_filters {
my ($t, $parent, $thunk) = @_;
if ($t->isa('Attean::Algebra::Filter')) {
my $expr = $t->expression;
my ($pattern) = @{ $t->children };
if ($pattern->isa('Attean::Algebra::Join')) {
my @children = @{ $pattern->children };
my @evars = $expr->in_scope_variables;
my $evars = Set::Scalar->new(@evars);
my @seen;
while (my $p = shift(@children)) {
my @vars = $p->in_scope_variables;
my $vars = Set::Scalar->new(@vars);
if ($evars->is_subset($vars)) {
warn "*** Filter can be pushed down to wrap: " . $p->as_string . "\n\n";
my @p = @seen;
push(@p, Attean::Algebra::Filter->new( children => [$p], expression => $expr ));
push(@p, @children);
while (scalar(@p) > 1) {
my ($l, $r) = splice(@p, 0, 2);
unshift(@p, Attean::Algebra::Join->new( children => [$l, $r] ));
}
my $new = shift(@p);
return (1, 0, $new);
} else {
push(@seen, $p);
}
}
}
}
return (0, 1, $t);
}
my $sparql = <<"END";
PREFIX dbo: <http://dbpedia.org/ontology/>
SELECT *
WHERE {
?place a dbo:PopulatedPlace .
?place dbo:populationTotal ?pop .
FILTER( (?pop < 50) ) .
}
END
my $query = RDF::Query->new($sparql);
my $t = AtteanX::RDFQueryTranslator->new();
my $a = $t->translate_query($query);
print "Original algebra before rewriting:\n";
print $a->as_string . "\n\n";
{
my $w = Attean::TreeRewriter->new(types => ['Attean::API::Algebra', 'Attean::API::Binding']);
$w->register_pre_handler(\&explode_bgps);
(undef, $a) = $w->rewrite($a, {});
}
{
my $w = Attean::TreeRewriter->new(types => ['Attean::API::Algebra', 'Attean::API::Binding']);
$w->register_pre_handler(\&pushdown_filters);
my ($changed, $rewritten) = $w->rewrite($a, {});
if ($changed) {
print "Rewritten after filter pushdown:\n" . $rewritten->as_string;
} else {
warn "No rewriting occurred when trying to push down filters.\n";
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment