Skip to content

Instantly share code, notes, and snippets.

@the-frey
Created May 29, 2014 09:59
Show Gist options
  • Save the-frey/c88336d23a545884c9ef to your computer and use it in GitHub Desktop.
Save the-frey/c88336d23a545884c9ef to your computer and use it in GitHub Desktop.
Rakefile for a Rake task that will download all named graphs from Fuseki and upload to Stardog.
require 'uri'
require 'rest_client'
class DatabaseDetails
attr_accessor :database_name, :username, :password, :port
def initialize(database_name, username = nil, password = nil, port = nil)
@database_name = database_name
@username = username || 'admin'
@password = password || 'admin'
@port = port || 5820
end
end
class FusekiConnection < DatabaseDetails; end
class StardogConnection < DatabaseDetails; end
module FusekiExport
def self.template_app_root
"#{File.dirname(__FILE__)}/../.."
end
def self.fuseki_all_graphs_query(fuseki_instance)
query = "PREFIX dcterms: <http://purl.org/dc/terms/>\r\nPREFIX owl: <http://www.w3.org/2002/07/owl#>\r\nPREFIX qb: <http://purl.org/linked-data/cube#>\r\nPREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\r\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\r\nPREFIX skos: <http://www.w3.org/2004/02/skos/core#>\r\nPREFIX xsd: <http://www.w3.org/2001/XMLSchema#>\r\n\r\nSELECT DISTINCT ?g\r\nWHERE { \r\n GRAPH ?g {\r\n ?s ?p ?o\r\n }\r\n}"
response = RestClient.get "http://localhost:#{fuseki_instance.port}/#{fuseki_instance.database_name}/sparql", {:params => {:query => query}}
response
end
def self.write_graph_to_turtle(graph_uri_string, fuseki_instance, filename)
success, response_turtle = false, nil
puts " Making fuseki data request for #{graph_uri_string}"
response = RestClient.get "http://localhost:#{fuseki_instance.port}/#{fuseki_instance.database_name}/data?graph=#{graph_uri_string}", {:accept => 'text/turtle'}
if response.code == 200 && !response.body.blank?
File.open("#{template_app_root}/data_backup/#{filename}.ttl", 'w') { |file|
file.write(response.body)
puts " Rough size of created file: #{(file.size.to_f / 2**20)} MB"
file.close()
}
success, response_turtle = true, response.body if File.exists?("#{template_app_root}/data_backup/#{filename}.ttl")
end
return success, response_turtle
end
end
directory "data_backup" do
sh "mkdir #{FusekiExport.template_app_root}/data_backup"
end
desc "A task to download and store turtle backups of named graphs from Fuseki. All named graphs will then be uploaded to Stardog."
task :fuseki_export, [:skip_postcodes] => ["data_backup"] do |t, args|
no_postcodes = args[:skip_postcodes] || nil
# database connections
fuseki = FusekiConnection.new('glasgow-development', nil, nil, 3030)
stardog = StardogConnection.new('glasgow-development')
# gets all named graphs from fuseki at :3030 or the uri specified
puts "--> Getting data from Fuseki"
response = FusekiExport.fuseki_all_graphs_query(fuseki)
puts "--> Request status: #{response.code}"
graphs = JSON.parse(response.body) if response.code == 200
puts "--> graphs variable populated? #{response.code == 200 ? true : false}"
# work out total number of named graphs
progress = 0
count = 0
list_of_named_graphs = graphs["results"]["bindings"]
total = list_of_named_graphs.length
graphs_with_errors = []
graphs["results"]["bindings"].each do |hash|
graph_uri_string = hash["g"]["value"]
next if graph_uri_string == "http://linked.glasgow.gov.uk/graph/postcodes" && !no_postcodes.nil?
puts "\n-> Starting processing for ##{count + 1} of #{total}: #{progress}/#{total} successful."
# save each graph to a turtle file
filename = graph_uri_string.sub(/^https?:\/\/.*(?:graph)\//, '').gsub(/\//, '|')
successfully_saved_to_file, graph_turtle_data = FusekiExport.write_graph_to_turtle(graph_uri_string, fuseki, filename)
if successfully_saved_to_file
puts " Successfully saved graph <#{graph_uri_string}> to file: \n <#{filename}.ttl>."
# begin transaction with basic Stardog superuser creds
begin_transaction_response = RestClient.post "http://#{stardog.username}:#{stardog.password}@localhost:#{stardog.port}/#{stardog.database_name}/transaction/begin", {:accept => :text}
begin
transaction_token = begin_transaction_response.body
puts " Sending data to Stardog..."
# submit data to Stardog endpoint
stardog_graph_post_response = RestClient.post "http://#{stardog.username}:#{stardog.password}@localhost:#{stardog.port}/#{stardog.database_name}/#{transaction_token}/add?graph-uri=#{graph_uri_string}", File.read("#{FusekiExport.template_app_root}/data_backup/#{filename}.ttl"), {
:content_type => 'text/turtle'
}
puts " Posting data complete: #{stardog_graph_post_response.code}, ready to commit"
# commit the transaction
RestClient.post "http://#{stardog.username}:#{stardog.password}@localhost:#{stardog.port}/#{stardog.database_name}/transaction/commit/#{transaction_token}", {:accept => :text}
puts " Transaction complete for: #{graph_uri_string}"
progress += 1
rescue => transaction_exception
# posting data to Stardog named graph has failed so we roll back the transaction
begin
RestClient.post "http://#{stardog.username}:#{stardog.password}@localhost:#{stardog.port}/#{stardog.database_name}/transaction/rollback/#{transaction_token}", {:accept => :text}
puts " Data could not be uploaded to Stardog. Transaction rolled back. Error: #{transaction_exception.to_s}"
rescue => rollback_exception
puts " Error: exception in transaction: #{transaction_exception}\n Error: #{rollback_exception.to_s} - rollback failed"
end
end
else
puts " Error: <#{graph_uri_string}> not saved to file."
end
count += 1
end
puts "\n\n#{progress}/#{total} operations completed without errors."
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment