Skip to content

Instantly share code, notes, and snippets.

@yoursdearboy
Created December 5, 2016 10:57
Show Gist options
  • Save yoursdearboy/132a56e79869f7e5e42de8bb93f6eeee to your computer and use it in GitHub Desktop.
Save yoursdearboy/132a56e79869f7e5e42de8bb93f6eeee to your computer and use it in GitHub Desktop.
Class and script to make json/csv from ICF classificator
#!/usr/bin/env ruby
# As source you need HTML file from http://apps.who.int/classifications/icfbrowser/Default.aspx
# with unfolded tree.
# It uses Ajax to fetch nested entries, so unfold each manually or you can use JS console to autoclick.
require 'optparse'
require 'nokogiri'
require 'json'
require 'csv'
class ICFEntry < Struct.new(:id, :code, :title, :children)
class << self;
def make(src)
make_node(src.css('> div')).first
end
private
@@last_id = 0
def gen_id
@@last_id += 1
end
def make_node(node)
tables = node.children.select{|c| c.name == 'table'}
tables.map do |tbl|
id = gen_id # Make id first for order
content = tbl.text.strip
matches = content.match /(\S\d*)\s(.*)/
code,title = matches.nil? ? [nil,content] : matches.captures
i = node.children.index(tbl)
ni = i + 1
children = if node.children.length > ni
div = node.children[ni]
if div.name == 'div'
make_node(div)
end
end
ICFEntry.new(id, code, title, children)
end
end
end
def to_h
out = {code: code, title: title}
out = out.merge({children: children.map{|c| c.to_h}}) unless children.nil?
out
end
def to_sh
if children.nil?
code_title
else
{title: code_title, children: children.map{|c| c.to_sh}}
end
end
def to_arr(parent=nil, acc=[])
out = [code, title, parent.nil? ? nil : parent.id]
acc << out
unless children.nil?
children.each do |child|
child.to_arr(parent=self, acc=acc)
end
end
acc
end
def code_title
"#{code} #{title}"
end
def to_csv()
CSV.generate(write_headers: true, headers: ["code", "title", "parent"]) do |csv|
to_arr.each do |entry|
csv << entry
end
end
end
def to_json(short=false)
if short
to_sh.to_json
else
to_h.to_json
end
end
end
options = {short: false}
options_parser = OptionParser.new do |opts|
opts.banner = "Usage: convert.rb [options] html-source-file"
opts.on("-t [TO]", "--to [TO]", [:csv,:json], "Output format") do |v|
options[:to] = v
end
opts.on("-s", "--short", "Use short json representation") do |v|
options[:short] = true
end
end
options_parser.parse!
filename = ARGV.pop
unless filename
puts options_parser
exit(1)
end
TREE_ID = "ctl00_ContentPlaceHolder1_tree"
src = Nokogiri::HTML(open(filename))
src = src.css("##{TREE_ID}").first
dict = ICFEntry.make src
if options[:to] == :csv
puts dict.to_csv
elsif options[:to] == :json
puts dict.to_json(options[:short])
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment