Last active
June 27, 2019 13:41
-
-
Save tfuji/239806e44218889f9d9b3c8f3fe7254d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'nokogiri' | |
#require 'erb' | |
require 'pp' | |
require 'json' | |
#require 'thor' | |
# 1. wget https://www.ncbi.nlm.nih.gov/biosample/docs/attributes/?format=xml -O ncbi_biosample_attributes.xml | |
# 2. %ruby ncbi_biosample_attributes_extend2ttl.rb ncbi_biosample_attributes.xml > biosample_attributes_extend.ttl | |
class BioSampleAttributes | |
include Enumerable | |
def initialize(xml) | |
@xml =xml | |
end | |
def each | |
@doc = [] | |
IO.foreach(@xml) do |line| | |
next if line =~/\<\?xml|BioSampleAttributes/ | |
@doc.push('<?xml version="1.0" encoding="UTF-8"?>') if line =~/^\s*<Attribute/ | |
@doc.push(line.chomp) | |
if line =~/\<\/Attribute\>/ | |
docs = @doc.join("\n").to_s | |
yield Attribute.new(docs) | |
@doc = [] | |
end | |
end | |
end | |
def to_ttl | |
puts "@base <http://ddbj.nig.ac.jp/ontologies/biosample> . | |
@prefix : <http://ddbj.nig.ac.jp/ontologies/biosample/> . | |
@prefix skos: <http://www.w3.org/2004/02/skos/core#> . | |
" | |
self.each_with_index do |attr,i| | |
attr.to_ttl | |
end | |
end | |
def format | |
self.each_with_index do |attr,i| | |
puts attr.harmonizedName + "\t" + attr.format | |
end | |
end | |
def to_s | |
self.each_with_index do |attr,i| | |
puts attr.to_s | |
end | |
end | |
def to_json | |
self.each_with_index do |attr,i| | |
puts attr.to_json | |
end | |
end | |
end | |
class Attribute | |
def initialize(xml) | |
@attr = Nokogiri::XML(xml).css("Attribute") | |
raise NameError, "attribute element not found" unless @attr | |
doc = Nokogiri::XML(xml) | |
package = doc.xpath("/Attribute") | |
end | |
def name | |
@attr.css('Name').inner_text | |
end | |
def harmonizedName | |
@attr.css('HarmonizedName').inner_text | |
end | |
def format | |
@attr.css('Format').inner_text | |
end | |
def preferred_format | |
case self.harmonizedName | |
when "air_temp_regm", "annual_season_temp", "host_body_temp", "ph", | |
"samp_store_temp", "temp", "typ_occupant_dens", "water_temp_regm" | |
"numeric" | |
when "birth_date","collection_date","death_date" | |
"date" | |
else | |
"text" | |
end | |
end | |
def synonym | |
#@attr.css('Synonym').inner_text | |
@attr.css('Synonym').to_a.join("; ") | |
end | |
def description | |
@attr.css('Description').inner_text | |
end | |
def to_ttl | |
class_name = self.harmonizedName.capitalize + "_Attribute" | |
print ":" + class_name + "\tskos:altLabel\t\"" + self.name + "\"" | |
if @attr.css('Synonym').size > 0 | |
puts ";" | |
puts @attr.css('Synonym').map{ |s| | |
" skos:hiddenLabel \"#{s.inner_text}\"" | |
}.join("; \n") +"." | |
else | |
puts "." | |
end | |
puts | |
puts ":" + class_name + "\t:preferred_format\t\"" + preferred_format + "\"." | |
puts | |
end | |
def to_s | |
#[self.name, self.harmonizedName, self.synonym, self.format, self.description].join("\t") | |
[self.name, @attr.css('Synonym').to_a.join("; ")].join("\t") | |
end | |
def to_json | |
{ | |
'name': self.name, | |
'harmonizedName': self.harmonizedName, | |
'synonym': self.synonym, | |
'format': self.format, | |
'description': self.description | |
}.to_json | |
end | |
end | |
xml = ARGV[0] || 'ncbi_biosample_attributes.xml' | |
attrs = BioSampleAttributes.new(xml) | |
attrs.to_ttl |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment