Skip to content

Instantly share code, notes, and snippets.

@kasei
Created December 12, 2015 03:18
Show Gist options
  • Save kasei/f04cd79df91055754f3c to your computer and use it in GitHub Desktop.
Save kasei/f04cd79df91055754f3c to your computer and use it in GitHub Desktop.
Query planning with join rotation and coalescing
use v5.14;
use Test::Modern;
use Attean;
use Attean::RDF;
use Attean::IDPQueryPlanner;
package AtteanX::API::JoinRotatingPlanner {
# Rotate joins like (A⋈B)⋈C to A⋈(B⋈C), with the ability to coalesce B⋈C (e.g. for adjacent BGPs)
use Moo::Role;
use namespace::clean;
requires 'coalesce';
sub coalesce {
my $self = shift;
my $plan = shift;
return $plan;
}
around 'join_plans' => sub {
my $orig = shift;
my $self = shift;
my $model = shift;
my $active_graphs = shift;
my $default_graphs = shift;
my $lplans = shift;
my $rplans = shift;
my $type = shift;
my @plans = $orig->($self, $model, $active_graphs, $default_graphs, $lplans, $rplans, $type, @_);
if ($type eq 'inner') {
my @rotated;
foreach my $p (@plans) {
my ($lhs, $rhs) = @{ $p->children };
if ($lhs->does('Attean::API::Plan::Join')) {
my ($a, $b) = @{ $lhs->children };
my $c = $rhs;
# (A⋈B)⋈C -> A⋈(B⋈C)
foreach my $q ($orig->($self, $model, $active_graphs, $default_graphs, [$b], [$c], $type, @_)) {
push(@rotated, $orig->($self, $model, $active_graphs, $default_graphs, [$a], [$self->coalesce($q)], $type, @_))
}
} elsif ($rhs->does('Attean::API::Plan::Join')) {
my $a = $lhs;
my ($b, $c) = @{ $rhs->children };
# A⋈(B⋈C) -> (A⋈B)⋈C
foreach my $q ($orig->($self, $model, $active_graphs, $default_graphs, [$a], [$b], $type, @_)) {
push(@rotated, $orig->($self, $model, $active_graphs, $default_graphs, [$self->coalesce($q)], [$c], $type, @_));
}
}
push(@rotated, $p);
}
return @rotated;
} else {
return @plans;
}
};
}
###############################################################################
package MyBGP {
use Moo;
use Scalar::Util qw(blessed reftype);
use Types::Standard qw(ConsumerOf ArrayRef);
use namespace::clean;
with 'Attean::API::NullaryQueryTree', 'Attean::API::UnionScopeVariablesPlan';
sub plan_as_string { return 'BGP' }
sub impl { die "Unimplemented" }
}
package MyPlanner {
use Moo;
use namespace::clean;
extends 'Attean::IDPQueryPlanner';
with 'AtteanX::API::JoinRotatingPlanner';
sub coalesce {
my $self = shift;
my $p = shift;
my ($lhs, $rhs) = @{ $p->children };
if ($lhs->isa('Attean::Plan::Quad') and $rhs->isa('Attean::Plan::Quad')) {
return MyBGP->new(children => [$lhs, $rhs], distinct => 0);
}
return $p;
}
around 'cost_for_plan' => sub {
my $orig = shift;
my $self = shift;
my $plan = shift;
if ($plan->isa('MyBGP')) {
return 1;
}
return $orig->($self, $plan, @_);
}
}
package MyTestStore {
use Moo;
use namespace::clean;
extends 'AtteanX::Store::Memory';
sub cost_for_plan {
# we do this because the superclass would return a cost of 0 for quads when the store is empty
# and if 0 was returned, there won't be any meaningful difference between the cost of different join algorithms
my $self = shift;
my $plan = shift;
if ($plan->isa('Attean::Plan::Quad')) {
return 3;
}
return;
}
}
###############################################################################
{
my $store = MyTestStore->new();
my $model = Attean::MutableQuadModel->new( store => $store );
my $graph = iri('http://example.org/');
# my $t = triple(variable('s'), iri('p'), literal('1'));
my $t = triple(variable('s'), iri('p'), variable('o'));
my $v = triple(variable('s'), iri('q'), literal('xyz'));
my $w = triple(variable('o'), iri('b'), iri('c'));
my $bgp1 = Attean::Algebra::BGP->new(triples => [$t]);
my $bgp2 = Attean::Algebra::BGP->new(triples => [$w]);
my $service = Attean::Algebra::Service->new(children => [$bgp2], endpoint => iri('http://endpoint.example.org/sparql'));
my $bgp3 = Attean::Algebra::BGP->new(triples => [$v]);
my $join1 = Attean::Algebra::Join->new(children => [$bgp1, $service]);
# (t ⋈ Service(w)) ⋈ v
my $join2 = Attean::Algebra::Join->new(children => [$join1, $bgp3]);
subtest 'before BGP merging' => sub {
my $p = Attean::IDPQueryPlanner->new();
my $plan = $p->plan_for_algebra($join2, $model, [$graph]);
# warn $plan->as_string;
# A possible plan for this algebra:
# - Hash Join { s }
# - Quad { ?s, <q>, "xyz", <http://example.org/> } (distinct)
# - Hash Join { o }
# - Service <http://endpoint.example.org/sparql> SELECT * WHERE { { ?o <b> <c> . } }
# - Quad { ?s, <p>, ?o, <http://example.org/> } (distinct)
does_ok($plan, 'Attean::API::Plan::Join');
my ($lhs, $rhs) = @{ $plan->children };
my $join;
if ($lhs->does('Attean::API::Plan::Join')) {
does_ok($lhs, 'Attean::API::Plan::Join');
isa_ok($rhs, 'Attean::Plan::Quad');
$join = $lhs;
} else {
does_ok($rhs, 'Attean::API::Plan::Join');
isa_ok($lhs, 'Attean::Plan::Quad');
$join = $rhs;
}
my ($join_lhs, $join_rhs) = @{ $join->children };
if ($join_lhs->isa('Attean::Plan::Quad')) {
isa_ok($join_lhs, 'Attean::Plan::Quad');
isa_ok($join_rhs, 'Attean::Plan::Service');
} else {
isa_ok($join_rhs, 'Attean::Plan::Quad');
isa_ok($join_lhs, 'Attean::Plan::Service');
}
};
subtest 'after BGP merging' => sub {
# (t ⋈ Service(w)) ⋈ v
# should yield one of the following after rewriting:
# - BGP(tv) ⋈ Service(w)
# - Service(w) ⋈ BGP(tv)
my $p = MyPlanner->new();
my $plan = $p->plan_for_algebra($join2, $model, [$graph]);
# warn $plan->as_string;
# A possible plan for this algebra:
# - NestedLoop Join
# - Service <http://endpoint.example.org/sparql> SELECT * WHERE { { ?o <b> <c> . } }
# - BGP
# - Quad { ?s, <p>, ?o, <http://example.org/> } (distinct)
# - Quad { ?s, <q>, "xyz", <http://example.org/> } (distinct)
does_ok($plan, 'Attean::API::Plan::Join');
my ($lhs, $rhs) = @{ $plan->children };
if ($lhs->isa('MyBGP')) {
isa_ok($lhs, 'MyBGP');
isa_ok($rhs, 'Attean::Plan::Service');
} else {
isa_ok($rhs, 'MyBGP');
isa_ok($lhs, 'Attean::Plan::Service');
}
};
}
done_testing();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment