Last active
May 12, 2022 22:15
-
-
Save zed9h/148874 to your computer and use it in GitHub Desktop.
six degrees to kevin bacon [and other queries] using dbpedia
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
use strict; | |
use warnings; | |
use Compress::Raw::Bzip2; | |
use Graph::Undirected; | |
use Storable; | |
use Term::ReadLine; # Term::ReadLine::Gnu | |
$|=1; | |
my $root = 'Kevin_Bacon'; # dbpedia resource name of an actor | |
my $input = 'infobox_en.nt.bz2'; # http://downloads.dbpedia.org/3.3/en/infobox_en.nt.bz2 | |
my $cache1 = '62kevin.g'; | |
my $cache2 = '62kevin.sptg'; | |
my $history = "$ENV{HOME}/.62kevin"; | |
$root = "a:$root"; | |
my $sptg; | |
if(-s $cache2) { | |
printf "loading cache2 (%d bytes) ...\n", -s $cache2; | |
$sptg = retrieve($cache2); | |
} else { | |
my $g; | |
if(-s $cache1) { | |
printf "loading cache1 (%d bytes) ...\n", -s $cache1; | |
$g = retrieve($cache1); | |
} else { | |
$g = Graph::Undirected->new(countvertexed=>1, countedged=>1); | |
my $size = -s $input; | |
printf "parse input (%d bytes compressed) ...\n", $size; | |
my $bz = new Compress::Raw::Bunzip2 | |
or die "Cannot create bunzip2 object\n"; | |
open FILE, "<$input" or die "$input: $!"; | |
my $last = 0; | |
my $buffer = ''; | |
my $compressed; | |
my $num_parsed = 0; | |
my $num_added = 0; | |
while (read(FILE,$compressed,4096)) { | |
my $decompressed; | |
my $status = $bz->bzinflate($compressed, $decompressed); | |
die "error decompressing: $status" | |
unless $status == BZ_OK or $status == BZ_STREAM_END; | |
$buffer .= $decompressed; | |
$buffer =~ s{[^\n]*\n}{ | |
$num_parsed ++; | |
my $res = 'http://dbpedia\\.org/resource'; | |
my $prop = 'http://dbpedia\\.org/property'; | |
if($& =~ m{<$res/(.*?)> <$prop/starring> <$res/(.*?)>}) { | |
$num_added ++; | |
my $movie = "m:$1"; | |
my $actor = "a:$2"; | |
$g->add_vertex($movie); | |
$g->add_vertex($actor); | |
$g->add_edge($movie,$actor); | |
} | |
if(time-$last > 2) { | |
$last = time; | |
printf "\r%6.2f%% completed (buffer %d) %d triples inserted of %d parsed.", | |
(tell(FILE) / $size)*100, length $buffer, $num_added, $num_parsed; | |
} | |
"" | |
}msge; | |
} | |
printf "\r%6.2f%% completed (buffer %d) %d triples inserted of %d parsed.\n", | |
(tell(FILE) / $size)*100, length $buffer, $num_added, $num_parsed; | |
close FILE; | |
printf "saving cache level1 (%d vertices, %d edges) ...\n", | |
scalar($g->vertices), scalar($g->edges); | |
store $g, $cache1; | |
} | |
printf "Dijkstra single-source shortest path (%d vertices, %d edges) ...\n", | |
scalar($g->vertices), scalar($g->edges); | |
$sptg = $g->SPT_Dijkstra($root); | |
printf "saving cache level2 ...\n"; | |
store $sptg, $cache2; | |
} | |
print "ready.\n"; | |
print "type one dbpedia actor resource name per line:\n"; | |
my $term = new Term::ReadLine '62kevin'; | |
eval { $term->ReadHistory($history); }; | |
while ( defined ($_ = $term->readline('actor>')) ) { | |
chomp; | |
next unless $_; | |
s{\b\w}{uc $&}eg; | |
s{\s}{_}g; | |
#$term->addhistory($_); # XXX automatic on gnu version | |
my $node = "a:$_"; | |
my $r = $root; | |
$r =~ s{a:}{}; | |
my $w = $sptg->get_vertex_attribute($node, 'weight'); | |
unless(defined $w) { | |
print $node eq $root ? | |
"$r is the root node, distance 0" : | |
"$_ not found, may not be connected to $r", | |
"\n"; | |
next; | |
} | |
my $d = int($w/2); | |
printf "%s is %s degree%s to %s through:\n", $_, $d, $d == 1 ? '' : 's', $r; | |
my $p = $node; | |
while($p = $sptg->get_vertex_attribute($p, 'p')) { | |
local $_ = $p; | |
s{m:}{movie: } or s{a:}{actor: }; | |
print " $_\n"; | |
} | |
} | |
print "\nshutting down ...\n"; | |
END { | |
if($term) { | |
eval { | |
$term->WriteHistory($history) | |
or die "$history: $!"; | |
print "history file $history wrote.\n"; | |
}; | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
PREFIX dbpedia: <http://dbpedia.org/resource/> | |
PREFIX dbpprop: <http://dbpedia.org/property/> | |
SELECT ?actor6 ?step | |
WHERE { | |
{ SELECT * | |
WHERE { [] dbpprop:starring ?actor1, ?actor6 . } | |
} | |
OPTION ( transitive, | |
t_distinct, | |
t_no_cycles, | |
t_in (?actor1), | |
t_out (?actor6), | |
t_step ('step_no') as ?step, | |
t_min(6), | |
t_max(6) ) . | |
FILTER (?actor1 = dbpedia:Kevin_Bacon) | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
PREFIX dbpedia: <http://dbpedia.org/resource/> | |
PREFIX dbpprop: <http://dbpedia.org/property/> | |
SELECT ?step ?count | |
WHERE | |
{ | |
{ | |
SELECT ?step COUNT(*) as ?count | |
WHERE | |
{ | |
{ | |
SELECT ?actor1 ?actorN | |
WHERE { [] dbpprop:starring ?actor1, ?actorN . } | |
} | |
OPTION ( transitive, | |
t_distinct, | |
t_no_cycles, | |
t_in (?actor1), | |
t_out (?actorN), | |
t_step ('step_no') as ?step, | |
t_min(1) ) . | |
FILTER (?actor1 = dbpedia:Kevin_Bacon) | |
} | |
GROUP BY ?step | |
} | |
} | |
ORDER BY ?step |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
select | |
(min(?n) AS ?name), | |
(sql:GROUP_CONCAT_DISTINCT(?p, " | ") AS ?page), | |
(sql:GROUP_CONCAT_DISTINCT(IF(?ln, ?ln, ?l), ", ") AS ?code) | |
where { | |
?s dbo:genre dbr:Game_engine ; | |
rdfs:label ?n . | |
FILTER (LANG(?n) = "en") | |
OPTIONAL { | |
?s foaf:homepage ?p . | |
} | |
OPTIONAL { | |
?s dbp:programmingLanguage ?l . | |
OPTIONAL { | |
?l rdfs:label ?ln . | |
FILTER (LANG(?ln) = "en") | |
} | |
} | |
} | |
group by ?s |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
select | |
min( (bif:power(bif:st_distance (?point, bif:st_point(-46.6333, -23.55)), 2) + bif:power(bif:st_distance (?point, bif:st_point(-43.1964, -22.9083)), 2) ) ) as ?rank | |
min(?label) as ?label | |
max(?pop) as ?pop | |
max(?height) as ?height | |
?city | |
where { | |
?city dbpedia-owl:country dbpedia:Brazil ; | |
geo:geometry ?point ; | |
dbpprop:elevationM ?height ; | |
dbpedia-owl:populationTotal ?pop ; | |
rdfs:label ?label . | |
FILTER( langMatches( lang(?label), "pt") ) | |
} | |
group by ?city | |
order by ?rank |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
define input:inference "http://example.com/rules" | |
PREFIX : <http://example.com#> | |
SELECT ?s ?n { | |
FILTER( ?o = :A ) | |
{ SELECT ?s ?o | |
WHERE { | |
{ ?s :is_a_subset_of ?o . } | |
UNION | |
{ ?o :is_a_superset_of ?s . } | |
} | |
} OPTION ( transitive, | |
t_in(?s), | |
t_out(?o), | |
t_distinct, | |
t_no_cycles, | |
t_step('step_no') as ?n, | |
t_min(1) ) . | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
PREFIX skos: <http://www.w3.org/2004/02/skos/core#> | |
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> | |
PREFIX dbpedia: <http://dbpedia.org/resource/> | |
PREFIX dbpprop: <http://dbpedia.org/property/> | |
SELECT ?chalkboard_gag, ?season_no, ?episode_no, ?episode_title | |
WHERE { | |
?episode rdfs:label ?episode_title ; | |
dbpprop:episodeNo ?episode_no ; | |
dbpprop:season ?season_no ; | |
dbpprop:blackboard ?chalkboard_gag . | |
FILTER (langMatches( lang(?episode_title), "EN" )) | |
FILTER (isLITERAL(?chalkboard_gag) && !regex(?chalkboard_gag, "^None|^-$", "i")) | |
} | |
ORDER BY ?season_no ?episode_no |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sparql | |
prefix n: <http://lua.net/n/> | |
prefix p: <http://lua.net/p/> | |
SELECT ?s ?o ?n ?via ?path | |
FROM <http://lua.net> | |
{ | |
FILTER( ?o = n:990 ) | |
FILTER( ?s = n:2210270 ) | |
{ SELECT ?s ?o | |
WHERE { | |
{ ?s p:edge ?o . } | |
UNION | |
{ ?o p:edge ?s . } | |
} | |
} OPTION ( transitive, | |
t_in(?s), | |
t_out(?o), | |
t_distinct, | |
t_no_cycles, | |
t_step(?s) as ?via, | |
t_step('step_no') as ?n, | |
t_step('path_id') as ?path, | |
t_min(1), | |
t_max(4) | |
) . | |
} | |
; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sparql # clear test environment | |
clear graph <http://test.com/>; | |
sparql # load test data | |
prefix : <http://test.com/> | |
insert data into graph <http://test.com/> { | |
:A a owl:Class . | |
:B a owl:Class . | |
:C a owl:Class . | |
:D a owl:Class . | |
:E a owl:Class . | |
:A rdfs:subClassOf owl:Thing . | |
:B rdfs:subClassOf owl:Thing . | |
:C rdfs:subClassOf :A . | |
:D rdfs:subClassOf :B . | |
:E rdfs:subClassOf :D . | |
:A :color "red" . | |
:B :color "blue" . | |
:C :color "green" . | |
:E :color "black" . | |
:a rdf:type :A . | |
:b rdf:type :B . | |
:c rdf:type :C . | |
:d rdf:type :D . | |
:e rdf:type :E . | |
}; | |
sparql # show the closest colors of one individual | |
define input:default-graph-uri <http://test.com/> | |
prefix : <http://test.com/> | |
SELECT ?color | |
WHERE | |
{ | |
:e a ?c0 . | |
?c0 rdfs:subClassOf ?c1 option (transitive, t_distinct, t_step('step_no') as ?n, t_in(?c0), t_out(?c1), t_min(0)) . | |
?c1 :color ?color . | |
} | |
order by ?n | |
limit 1; | |
sparql # show all the closest colors of each class | |
define input:default-graph-uri <http://test.com/> | |
prefix : <http://test.com/> | |
SELECT ?c0 (bif:aref(min(bif:vector(?n, ?color)),1)) as ?v # interesting vector aggregation | |
WHERE | |
{ | |
?c0 a owl:Class . | |
?c0 rdfs:subClassOf ?c1 option (transitive, t_distinct, t_step('step_no') as ?n, t_in(?c0), t_out(?c1), t_min(0)) . | |
?c1 :color ?color . | |
} | |
group by ?c0; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment