Created
March 15, 2012 00:44
-
-
Save ptrv/2040767 to your computer and use it in GitHub Desktop.
import vienna opendata trees GML file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# Get all tree entries from vienna's open data | |
# service and put them into a sqlite3 database. | |
# | |
# Peter Vasil | |
# Date: 2012-01-15 | |
require 'rubygems' | |
# require 'rexml/document' | |
# include REXML | |
require 'nokogiri' | |
require 'sqlite3' | |
require 'progressbar' | |
$properties_names = [ | |
"BAUMNUMMER", | |
"GEBIET", | |
"STRASSE", | |
"ART", | |
"PFLANZJAHR", | |
"STAMMUMFANG", | |
"KRONENDURCHMESSER", | |
"BAUMHOEHE" | |
] | |
$create_db_str = " | |
CREATE TABLE trees ( | |
treeid INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, | |
idstr TEXT NOT NULL, | |
lon REAL NOT NULL, | |
lat REAL NOT NULL, | |
#{$properties_names[0]} INTEGER, | |
#{$properties_names[1]} TEXT, | |
#{$properties_names[2]} TEXT, | |
#{$properties_names[3]} TEXT, | |
#{$properties_names[4]} INTEGER, | |
#{$properties_names[5]} REAL, | |
#{$properties_names[6]} REAL, | |
#{$properties_names[7]} REAL | |
);" | |
class Tree | |
attr_accessor :idstr, :lon, :lat, :properties | |
def initialize idstr, lon, lat, properties | |
@idstr = idstr | |
@lon = lon | |
@lat = lat | |
@properties = properties | |
end | |
def to_s | |
result = "Tree: " | |
result << "#{idstr}, #{lat}, #{lon}\n" | |
properties.each do |key, val| | |
result << "#{key} => #{val}\n" | |
end | |
result | |
end | |
end | |
# # DOM parser | |
# def parse_trees_xml(xml_file) | |
# gml_file = File.new(xml_file) | |
# doc = Document.new(gml_file) | |
# gml_file.close | |
# | |
# result = Array.new() | |
# root = doc.root | |
# num_elems = root.elements["count(//ogdwien:BAUMOGD)"] | |
# puts num_elems | |
# pbar = ProgressBar.new("Parse", num_elems) | |
# root.each_element("//ogdwien:BAUMOGD") do |elem| | |
# id = elem.attributes["gml:id"] | |
# geo = elem.elements[ | |
# "./ogdwien:SHAPE/gml:Point/gml:pos" | |
# ].text.split(' ', 2) | |
# lon = geo[0].to_f | |
# lat = geo[1].to_f | |
# properties = {} | |
# $properties_names.each do |name| | |
# #puts name | |
# xpath_val = "./ogdwien:#{name}" | |
# properties[name] = elem.elements[xpath_val].text | |
# end | |
# result.push(Tree.new(id, lon, lat, properties)) | |
# pbar.inc | |
# end | |
# pbar.finish | |
# result | |
# end | |
# Stream parser | |
def parse_trees_xml(xml_file) | |
reader = Nokogiri::XML::Reader(File.open(xml_file)) | |
result = Array.new | |
count = 0 | |
reader.each do |node| | |
if node.name == "ogdwien:BAUMOGD" && node.node_type == Nokogiri::XML::Reader::TYPE_ELEMENT | |
count += 1 | |
end | |
end | |
# puts count | |
reader = Nokogiri::XML::Reader(File.open(xml_file)) | |
pbar = ProgressBar.new("Parse", count) | |
id = "" | |
lon = 0.0 | |
lat = 0.0 | |
properties = {} | |
start_element = Nokogiri::XML::Reader::TYPE_ELEMENT | |
end_element = Nokogiri::XML::Reader::TYPE_END_ELEMENT | |
reader.each do |node| | |
if node.name == "ogdwien:BAUMOGD" && node.node_type == start_element | |
# puts "BAUMOGD" | |
id = node.attribute("gml:id") | |
pbar.inc | |
elsif node.name == "gml:pos" && node.node_type == start_element | |
# puts "pos" | |
geo = node.inner_xml.split(' ', 2) | |
lon = geo[0] | |
lat = geo[1] | |
elsif node.name == "ogdwien:#{$properties_names[0]}" && node.node_type == start_element | |
properties[$properties_names[0]] = node.inner_xml.to_i | |
elsif node.name == "ogdwien:#{$properties_names[1]}" && node.node_type == start_element | |
properties[$properties_names[1]] = node.inner_xml | |
elsif node.name == "ogdwien:#{$properties_names[2]}" && node.node_type == start_element | |
properties[$properties_names[2]] = node.inner_xml | |
elsif node.name == "ogdwien:#{$properties_names[3]}" && node.node_type == start_element | |
properties[$properties_names[3]] = node.inner_xml | |
elsif node.name == "ogdwien:#{$properties_names[4]}" && node.node_type == start_element | |
properties[$properties_names[4]] = node.inner_xml | |
elsif node.name == "ogdwien:#{$properties_names[5]}" && node.node_type == start_element | |
properties[$properties_names[5]] = node.inner_xml | |
elsif node.name == "ogdwien:#{$properties_names[6]}" && node.node_type == start_element | |
properties[$properties_names[6]] = node.inner_xml | |
elsif node.name == "ogdwien:#{$properties_names[7]}" && node.node_type == start_element | |
properties[$properties_names[7]] = node.inner_xml | |
elsif node.name == "ogdwien:BAUMOGD" && node.node_type == end_element | |
# puts "/BAUMOGD" | |
result.push(Tree.new(id,lon,lat,properties)) | |
id = "" | |
lon = 0.0 | |
lat = 0.0 | |
properties = {} | |
end | |
end | |
pbar.finish | |
result | |
end | |
def prepare_db(db) | |
result = db.execute( | |
"SELECT CASE WHEN tbl_name = 'trees' | |
THEN 1 ELSE 0 END FROM sqlite_master | |
WHERE tbl_name = 'trees' AND type = 'table'" | |
) | |
if result.size == 0 | |
unless result[0] == 1 | |
db.execute($create_db_str) | |
end | |
end | |
end | |
def insert_trees(db, trees, num_trees) | |
actual_num_tree = 0 | |
pbar = ProgressBar.new("Inserting", num_trees) | |
db.transaction | |
trees.each do |elem| | |
# actual_num_tree += 1 | |
db.execute( | |
"INSERT INTO trees (idstr, lon, lat, | |
#{$properties_names[0]}, | |
#{$properties_names[1]}, | |
#{$properties_names[2]}, | |
#{$properties_names[3]}, | |
#{$properties_names[4]}, | |
#{$properties_names[5]}, | |
#{$properties_names[6]}, | |
#{$properties_names[7]}) | |
VALUES (:idstr, :lon, :lat, | |
:p1, :p2, :p3, :p4, :p5,:p6, :p7, :p8)", | |
"idstr" => elem.idstr, | |
"lon" => elem.lon, | |
"lat" => elem.lat, | |
"p1" => elem.properties[$properties_names[0]], | |
"p2" => elem.properties[$properties_names[1]], | |
"p3" => elem.properties[$properties_names[2]], | |
"p4" => elem.properties[$properties_names[3]], | |
"p5" => elem.properties[$properties_names[4]], | |
"p6" => elem.properties[$properties_names[5]], | |
"p7" => elem.properties[$properties_names[6]], | |
"p8" => elem.properties[$properties_names[7]] | |
) | |
# pbar.set(actual_num_tree); | |
pbar.inc | |
# puts "Iserted tree #{actual_num_tree} of #{num_trees}" | |
end | |
db.commit | |
db.close | |
pbar.finish | |
end | |
########################################################### | |
# program | |
########################################################### | |
# check if number of arguments is ok | |
unless ARGV.length == 2 | |
puts "Not the right number of arguments." | |
puts "Usage: ruby trees2sqlite.rb gml.xml trees.sqlite.\n" | |
puts "GML file link: http://data.wien.gv.at/daten/wfs?service=WFS&request=GetFeature&version=1.1.0&typeName=ogdwien:BAUMOGD&srsName=EPSG:4326" | |
exit | |
end | |
import_file = ARGV[0] | |
database_path = ARGV[1] | |
########################################################### | |
# parse xml | |
########################################################### | |
trees = parse_trees_xml(import_file) | |
num_trees = trees.size | |
puts "Parsed #{num_trees} trees" | |
#trees.each do |elem| | |
#puts elem.to_s | |
#end | |
########################################################### | |
# insert trees into db | |
########################################################### | |
db = SQLite3::Database.new(database_path) | |
prepare_db(db) | |
insert_trees(db, trees, num_trees) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment