Created
August 7, 2022 00:26
-
-
Save kasei/580f6b407915d5b091531c853dfc08a8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
=head1 NAME | |
attean-sparql-list-operators.plpl - Example of using list functions and a new EXPLODE operator in SPARQL | |
=head1 DESCRIPTION | |
New extension functions operating over literals with datatype `ex:List`: | |
=over 4 | |
=item C<< ex:split(xsd:string, xsd:string) -> ex:List >> | |
=item C<< ex:zip(ex:List, ex:List) -> ex:List >> | |
=item C<< ex:listGet(ex:List, xsd:integer) -> RDFTerm >> | |
=back | |
A new C<<EXPLODE>> operator which syntactically mirrors C<<BIND>>, but which produces any number of results: | |
=over 4 | |
=item C<< EXPLODE(expr AS ?var) >> | |
expr evaluating to ex:List, produces one result for each element of the encoded list | |
=back | |
=cut | |
use v5.14; | |
use autodie; | |
use utf8; | |
use Attean; | |
use Attean::RDF; | |
use Attean::SimpleQueryEvaluator; | |
use AtteanX::Functions::CompositeLists; | |
# Enable the extension functions | |
AtteanX::Functions::CompositeLists->register(); | |
my $sparql = <<"END"; | |
PREFIX ex: <http://example.org/> | |
CONSTRUCT { | |
?project ex:principalInvestigatorContact ?piContact ; | |
ex::principalInvestigator ?pi . | |
?researcher a ex:Researcher ; | |
ex:name ?name . | |
} | |
WHERE { | |
# Original data | |
VALUES (?project_id ?ids ?names) { | |
( | |
"123" | |
"1858722 (contact); 1883064; 3150248;" | |
"BUCK, JOCHEN (contact); LEVIN, LONNY R; VISCONTI, PABLO E.;" | |
) | |
} | |
# Split names and ids into individual records, contained in a ex:List-typed | |
# literal. | |
BIND(ex:split(?ids, "; ") AS ?idList) | |
BIND(ex:split(?names, "; ") AS ?nameList) | |
# Make a single list of (name, id) pairs | |
BIND(ex:zip(?nameList, ?idList) AS ?pairs) | |
# Make one result per (name, id) pair | |
EXPLODE(?pairs AS ?pair) | |
# Extract the name and id from the pair ("with annotation" because they | |
# might contain the trailing " (contact)" string) | |
BIND(ex:listGet(?pair, 0) AS ?nameWithAnnotation) | |
BIND(ex:listGet(?pair, 1) AS ?idWithAnnotation) | |
# Strip off the " (contact)" annotation, if present | |
BIND(REPLACE(?nameWithAnnotation, " [(]contact[)]", "") AS ?name) | |
BIND(REPLACE(?idWithAnnotation, " [(]contact[)]", "") AS ?id) | |
# Set a flag if this record is marked as the contact | |
BIND(STRENDS(?idWithAnnotation, " (contact)") AS ?isContact) | |
# Construct the ?researcher IRI | |
BIND(URI(CONCAT("researcher/", ?id)) AS ?researcher) | |
# Construct the ?project | |
BIND(URI(CONCAT("project/", ?project_id)) AS ?project) | |
# Using IRI() with either the bound ?researcher value or the (necessarily) | |
# unbound ?undef will result in ?piContact (?pi, respectively) being bound | |
# only if (not if, respectively) the ?isContact variable is true (false). | |
# The `false` value will cause a type error and result in the variable | |
# being unbound. | |
BIND(IRI(IF(?isContact, ?researcher, ?undef)) AS ?piContact) | |
BIND(IRI(IF(?isContact, ?undef, ?researcher)) AS ?pi) | |
} | |
END | |
my $parser = Attean->get_parser('SPARQL')->new(); | |
my $store = Attean->get_store('Memory')->new(); | |
my $model = Attean::MutableQuadModel->new( store => $store ); | |
my $graph = Attean::IRI->new('http://example.org/graph'); | |
my $e = Attean::SimpleQueryEvaluator->new( model => $model, default_graph => $graph ); | |
my ($algebra) = $parser->parse($sparql); | |
my $results = $e->evaluate($algebra, $graph); | |
my $serializer = Attean->get_serializer('turtle')->new(); | |
$serializer->serialize_iter_to_io(\*STDOUT, $results); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment