Skip to content

Instantly share code, notes, and snippets.

@cbeer
Created January 28, 2011 19:19
Show Gist options
  • Save cbeer/800777 to your computer and use it in GitHub Desktop.
Save cbeer/800777 to your computer and use it in GitHub Desktop.
require 'rubygems'
require 'curb'
require 'trollop'
require 'activesupport'
skos_property_id = {"alternative-label"=>1, "hidden-label"=>9, "notation"=>30, "note"=>17, "definition"=>5, "example"=>7, "scope-note"=>22, "editorial-note"=>6, "change-note"=>4, "history-note"=>10, "has-broader"=>3, "has-narrower"=>16, "related-to"=>21, "is-in-mapping-relation-with"=>37, "has-broader-match"=>32, "has-narrower-match"=>33, "has-related-match"=>34, "has-close-match"=>36, "has-exact-match"=>35}
opts = Trollop::options do
version "metadataregistry.org ingest 0.0.1 (c) 2011 Chris Beer"
banner <<-EOS
Batch ingest vocabulary items into metadataregistry.org
Usage:
ingest.rb [options] <filename>
where [options] are:
EOS
opt :sandbox, "Use sandbox"
# opt :file, "Filename to controlled vocabulary document", :type => String
opt :owner, "metadataregistry.org owner id", :type => :int
opt :base, "base domain for vocabulary", :type => String, :default => ''
opt :cookies, "metadataregistry.org authentication token", :type => String
end
OWNER_ID = opts[:owner]
BASE_DOMAIN = opts[:base]
COOKIES = opts[:cookies]
FILE_NAME = ARGV.first
BASE_NAME = File.basename(FILE_NAME, File.extname(FILE_NAME)).gsub('_', '/')
METADATA_REGISTRY = opts[:sandbox] ? "http://sandbox.metadataregistry.org" : "http://metadataregistry.org"
c = Curl::Easy.new(METADATA_REGISTRY + "/vocabulary/save.html")
c.cookies = COOKIES
c.http_post(Curl::PostField.content('vocabulary[agent_id]', OWNER_ID),
Curl::PostField.content('vocabulary[name]', "PBCore #{BASE_NAME}"),
Curl::PostField.content('vocabulary[url]', BASE_DOMAIN + BASE_NAME),
Curl::PostField.content('vocabulary[note]', ''),
Curl::PostField.content('vocabulary[community]', ''),
Curl::PostField.content('vocabulary[status_id]', 1),
Curl::PostField.content('vocabulary[language]', 'en'),
Curl::PostField.content('vocabulary[base_domain]', BASE_DOMAIN),
Curl::PostField.content('vocabulary[token]', BASE_NAME),
Curl::PostField.content('vocabulary[uri]', BASE_DOMAIN + BASE_NAME))
print c.header_str
VOCAB_ID = c.header_str.split("\r\n").select { |x| x =~ /Location:/ }.first.scan(/\d+/).first
term_to_id = {}
terms = open(FILE_NAME).read.split("\n").map { |x| x.strip }.compact
c = Curl::Easy.new(METADATA_REGISTRY + "/concept/save.html")
c.cookies = COOKIES
terms.each do |t|
print "-- #{t} -- \n"
c.http_post(Curl::PostField.content('vocabulary_id', VOCAB_ID),
Curl::PostField.content('concept[pref_label]', t),
Curl::PostField.content('concept[uri]', "#{BASE_DOMAIN}#{BASE_NAME}##{t.parameterize}"),
Curl::PostField.content('status_id', 2),
Curl::PostField.content('language', 'en'),
Curl::PostField.content('save', 'save'))
print c.header_str
print c.post_body
term_to_id[t.parameterize] = c.header_str.split("\r\n").select { |x| x =~ /Location:/ }.first.scan(/\d+/).first
t.parameterize.split('-').inject('') do |token, word|
token += '-' + word
token.gsub!(/^-/, '')
if term_to_id[token] and token != t.parameterize
cp = Curl::Easy.new(METADATA_REGISTRY + "/conceptprop/save.html")
cp.cookies = COOKIES
cp.http_post(Curl::PostField.content('concept_id', term_to_id[t.parameterize]),
Curl::PostField.content('concept_property[skos_property_id]', skos_property_id['has-broader']),
Curl::PostField.content('concept_property[scheme_id]', VOCAB_ID),
Curl::PostField.content('concept_property[related_concept_id]', term_to_id[token]),
Curl::PostField.content('concept_property[language]', 'en'),
Curl::PostField.content('concept_property[status_id]', 1))
end
token
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment