Created
May 29, 2014 09:59
-
-
Save the-frey/c88336d23a545884c9ef to your computer and use it in GitHub Desktop.
Rakefile for a Rake task that will download all named graphs from Fuseki and upload to Stardog.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'uri' | |
require 'rest_client' | |
class DatabaseDetails | |
attr_accessor :database_name, :username, :password, :port | |
def initialize(database_name, username = nil, password = nil, port = nil) | |
@database_name = database_name | |
@username = username || 'admin' | |
@password = password || 'admin' | |
@port = port || 5820 | |
end | |
end | |
class FusekiConnection < DatabaseDetails; end | |
class StardogConnection < DatabaseDetails; end | |
module FusekiExport | |
def self.template_app_root | |
"#{File.dirname(__FILE__)}/../.." | |
end | |
def self.fuseki_all_graphs_query(fuseki_instance) | |
query = "PREFIX dcterms: <http://purl.org/dc/terms/>\r\nPREFIX owl: <http://www.w3.org/2002/07/owl#>\r\nPREFIX qb: <http://purl.org/linked-data/cube#>\r\nPREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\r\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\r\nPREFIX skos: <http://www.w3.org/2004/02/skos/core#>\r\nPREFIX xsd: <http://www.w3.org/2001/XMLSchema#>\r\n\r\nSELECT DISTINCT ?g\r\nWHERE { \r\n GRAPH ?g {\r\n ?s ?p ?o\r\n }\r\n}" | |
response = RestClient.get "http://localhost:#{fuseki_instance.port}/#{fuseki_instance.database_name}/sparql", {:params => {:query => query}} | |
response | |
end | |
def self.write_graph_to_turtle(graph_uri_string, fuseki_instance, filename) | |
success, response_turtle = false, nil | |
puts " Making fuseki data request for #{graph_uri_string}" | |
response = RestClient.get "http://localhost:#{fuseki_instance.port}/#{fuseki_instance.database_name}/data?graph=#{graph_uri_string}", {:accept => 'text/turtle'} | |
if response.code == 200 && !response.body.blank? | |
File.open("#{template_app_root}/data_backup/#{filename}.ttl", 'w') { |file| | |
file.write(response.body) | |
puts " Rough size of created file: #{(file.size.to_f / 2**20)} MB" | |
file.close() | |
} | |
success, response_turtle = true, response.body if File.exists?("#{template_app_root}/data_backup/#{filename}.ttl") | |
end | |
return success, response_turtle | |
end | |
end | |
directory "data_backup" do | |
sh "mkdir #{FusekiExport.template_app_root}/data_backup" | |
end | |
desc "A task to download and store turtle backups of named graphs from Fuseki. All named graphs will then be uploaded to Stardog." | |
task :fuseki_export, [:skip_postcodes] => ["data_backup"] do |t, args| | |
no_postcodes = args[:skip_postcodes] || nil | |
# database connections | |
fuseki = FusekiConnection.new('glasgow-development', nil, nil, 3030) | |
stardog = StardogConnection.new('glasgow-development') | |
# gets all named graphs from fuseki at :3030 or the uri specified | |
puts "--> Getting data from Fuseki" | |
response = FusekiExport.fuseki_all_graphs_query(fuseki) | |
puts "--> Request status: #{response.code}" | |
graphs = JSON.parse(response.body) if response.code == 200 | |
puts "--> graphs variable populated? #{response.code == 200 ? true : false}" | |
# work out total number of named graphs | |
progress = 0 | |
count = 0 | |
list_of_named_graphs = graphs["results"]["bindings"] | |
total = list_of_named_graphs.length | |
graphs_with_errors = [] | |
graphs["results"]["bindings"].each do |hash| | |
graph_uri_string = hash["g"]["value"] | |
next if graph_uri_string == "http://linked.glasgow.gov.uk/graph/postcodes" && !no_postcodes.nil? | |
puts "\n-> Starting processing for ##{count + 1} of #{total}: #{progress}/#{total} successful." | |
# save each graph to a turtle file | |
filename = graph_uri_string.sub(/^https?:\/\/.*(?:graph)\//, '').gsub(/\//, '|') | |
successfully_saved_to_file, graph_turtle_data = FusekiExport.write_graph_to_turtle(graph_uri_string, fuseki, filename) | |
if successfully_saved_to_file | |
puts " Successfully saved graph <#{graph_uri_string}> to file: \n <#{filename}.ttl>." | |
# begin transaction with basic Stardog superuser creds | |
begin_transaction_response = RestClient.post "http://#{stardog.username}:#{stardog.password}@localhost:#{stardog.port}/#{stardog.database_name}/transaction/begin", {:accept => :text} | |
begin | |
transaction_token = begin_transaction_response.body | |
puts " Sending data to Stardog..." | |
# submit data to Stardog endpoint | |
stardog_graph_post_response = RestClient.post "http://#{stardog.username}:#{stardog.password}@localhost:#{stardog.port}/#{stardog.database_name}/#{transaction_token}/add?graph-uri=#{graph_uri_string}", File.read("#{FusekiExport.template_app_root}/data_backup/#{filename}.ttl"), { | |
:content_type => 'text/turtle' | |
} | |
puts " Posting data complete: #{stardog_graph_post_response.code}, ready to commit" | |
# commit the transaction | |
RestClient.post "http://#{stardog.username}:#{stardog.password}@localhost:#{stardog.port}/#{stardog.database_name}/transaction/commit/#{transaction_token}", {:accept => :text} | |
puts " Transaction complete for: #{graph_uri_string}" | |
progress += 1 | |
rescue => transaction_exception | |
# posting data to Stardog named graph has failed so we roll back the transaction | |
begin | |
RestClient.post "http://#{stardog.username}:#{stardog.password}@localhost:#{stardog.port}/#{stardog.database_name}/transaction/rollback/#{transaction_token}", {:accept => :text} | |
puts " Data could not be uploaded to Stardog. Transaction rolled back. Error: #{transaction_exception.to_s}" | |
rescue => rollback_exception | |
puts " Error: exception in transaction: #{transaction_exception}\n Error: #{rollback_exception.to_s} - rollback failed" | |
end | |
end | |
else | |
puts " Error: <#{graph_uri_string}> not saved to file." | |
end | |
count += 1 | |
end | |
puts "\n\n#{progress}/#{total} operations completed without errors." | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment