Skip to content

Instantly share code, notes, and snippets.

@moustaki
Created October 1, 2009 13:47
Show Gist options
  • Save moustaki/198966 to your computer and use it in GitHub Desktop.
Save moustaki/198966 to your computer and use it in GitHub Desktop.
require 'rubygems'
require '4store-ruby'
require 'rdf/redland'
store = FourStore::Store.new 'https://server11.incubator.bbc.co.uk/sparql/', {
'certificate' => 'cert.ca',
'key' => 'cert.pem',
'soft-limit' => "-1"
}
results = store.select "
PREFIX owl: <http://www.w3.org/2002/07/owl#>
SELECT DISTINCT ?o WHERE {
?s owl:sameAs ?o
}
"
uris = results.map {|result| result["o"]}
num_threads = 10
threads = []
(1..num_threads).each do |i|
threads << Thread.new(i) { |j|
while(true) do
puts "Thread #{j} - picking and loading"
uri = uris.shift
puts "Checking #{uri}"
r = store.select("
SELECT * WHERE {
GRAPH <#{uri}> {
?s ?p ?o
}
}
")
if r.size == 0
puts "Loading #{uri}"
storage = Redland::TripleStore.new("hashes", "storage", "new='yes',hash-type='memory'")
model = Redland::Model.new(@storage)
parser = Redland::Parser.new
parser.parse_into_model(model, uri)
turtle = model.to_string("", nil, 'application/x-turtle')
puts turtle
store.set(uri, turtle)
else
puts "Already loaded #{uri}"
end
break if uris.empty?
end
}
end
threads.each { |aThread| aThread.join }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment