Created
January 26, 2013 02:45
-
-
Save istana/4639781 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# -*- coding: UTF-8 -*- | |
# this program should be executed through | |
# rails runner | |
# /usr/bin/time -v ./script/rails runner ../add_locations.rb -y --obce /home/hero/html/OBCE.XLS --ulice /home/hero/html/ULICE.XLS | |
=begin | |
rails new psctest | |
rails g scaffold district name:string code:string | |
rails g scaffold county name:string code:integer district_id:integer shortcuts:text | |
add serialize :shortcuts to model | |
rails g scaffold village name:string zip:string county_id:integer | |
rake db:create && rake db:migrate | |
class CreateDistricts < ActiveRecord::Migration | |
def change | |
create_table :districts do |t| | |
t.string :name, :null => false | |
t.string :code, :null => false | |
t.timestamps | |
end | |
end | |
end | |
class CreateCounties < ActiveRecord::Migration | |
def change | |
create_table :counties do |t| | |
t.string :name, :null => false | |
t.integer :code, :null => false | |
t.integer :district_id, :null => false | |
t.text :shortcuts, :null => false | |
t.timestamps | |
end | |
end | |
end | |
class CreateVillages < ActiveRecord::Migration | |
def change | |
create_table :villages do |t| | |
t.string :name, :null => false | |
t.string :zip, :null => false | |
t.integer :county_id, :null => false | |
t.timestamps | |
end | |
end | |
end | |
=end | |
require 'optparse' | |
require 'iconv' | |
require 'roo' | |
def open_spreadsheet(filename) | |
case File.extname(filename) | |
when ".xls", ".XLS" then Excel.new(filename, nil, :ignore) | |
else raise("Súbor #{filename} musí mať .xls alebo .XLS príponu") | |
end | |
end | |
def verbose(string) | |
if $options[:verbose] == true | |
puts string | |
end | |
end | |
db_krajov = { | |
'BL' => 'Bratislavský kraj', | |
'TA' => 'Trnavský kraj', | |
'TC' => 'Trenčiansky kraj', | |
'NI' => 'Nitriansky kraj', | |
'ZI' => 'Žilinský kraj', | |
'BC' => 'Banskobystrický kraj', | |
'PV' => 'Prešovský kraj', | |
'KI' => 'Košický kraj' | |
} | |
# kod okresu a evidenčné číslo | |
db_evc = { | |
301 => 'BN', | |
601 => ['BB', 'BC', 'BK'], | |
602 => 'BS', | |
701 => 'BJ', | |
101 => ['BA', 'BD', 'BE', 'BI', 'BL', 'BT'], | |
102 => ['BA', 'BD', 'BE', 'BI', 'BL', 'BT'], | |
103 => ['BA', 'BD', 'BE', 'BI', 'BL', 'BT'], | |
104 => ['BA', 'BD', 'BE', 'BI', 'BL', 'BT'], | |
105 => ['BA', 'BD', 'BE', 'BI', 'BL', 'BT'], | |
603 => 'BR', | |
501 => 'BY', | |
502 => 'CA', | |
604 => 'DT', | |
503 => 'DK', | |
201 => 'DS', | |
202 => 'GA', | |
801 => 'GL', | |
203 => 'HC', | |
702 => 'HE', | |
302 => 'IL', | |
703 => 'KK', | |
401 => 'KN', | |
802 => ['KE', 'KC', 'KI'], | |
803 => ['KE', 'KC', 'KI'], | |
804 => ['KE', 'KC', 'KI'], | |
805 => ['KE', 'KC', 'KI'], | |
806 => 'KS', | |
605 => 'KA', | |
504 => 'KM', | |
402 => 'LV', | |
704 => 'LE', | |
505 => 'LM', | |
606 => 'LC', | |
106 => 'MA', | |
506 => 'MT', | |
705 => 'ML', | |
807 => 'MI', | |
303 => 'MY', | |
507 => 'NO', | |
403 => ['NR', 'NI', 'NT'], | |
304 => 'NM', | |
404 => 'NZ', | |
305 => 'PE', | |
107 => 'PK', | |
204 => 'PN', | |
607 => 'PT', | |
706 => 'PP', | |
306 => 'PB', | |
707 => ['PO', 'PV', 'PS'], | |
307 => 'PD', | |
308 => 'PU', | |
608 => 'RA', | |
609 => 'RS', | |
808 => 'RV', | |
508 => 'RK', | |
708 => 'SB', | |
108 => 'SC', | |
205 => 'SE', | |
206 => 'SI', | |
709 => 'SV', | |
809 => 'SO', | |
810 => 'SN', | |
710 => 'SL', | |
711 => 'SP', | |
712 => 'SK', | |
405 => 'SA', | |
406 => 'TO', | |
811 => 'TV', | |
309 => ['TN', 'TC', 'TE'], | |
207 => ['TT', 'TA', 'TB'], | |
509 => 'TR', | |
510 => 'TS', | |
610 => 'VK', | |
713 => 'VT', | |
407 => 'ZM', | |
611 => 'ZV', | |
612 => 'ZC', | |
613 => 'ZH', | |
511 => ['ZA', 'ZI', 'ZL'] | |
} | |
$options = {} | |
$options[:yes] = false | |
$options[:verbose] = false | |
$options[:obce] = 'OBCE.XLS' | |
$options[:ulice] = 'ULICE.XLS' | |
opts_parser = OptionParser.new do |opts| | |
opts.banner = "Program na pridanie PSČ, obcí, okresov a skratiek krajov\n" + | |
"z XLS súborov OBCE.XLS a ULICE.XLS z http://posta.sk do databázy\n" + | |
"verzia 1/2013\n\n" | |
"Použitie: pridaj_lokality [options]" | |
opts.on("-y", "--yes", "Naozaj spusti") do |y| | |
$options[:yes] = y | |
end | |
opts.on("-v", "--[no-]verbose", "Spusti ukecané") do |v| | |
$options[:verbose] = v | |
end | |
opts.on("--obce SUBOR", "XLS súbor s obcami") do |subor| | |
$options[:obce] = subor | |
end | |
opts.on("--ulice SUBOR", "XLS súbor s ulicami") do |subor| | |
$options[:ulice] = subor | |
end | |
opts.on_tail("-h", "--help", "Ukáž túto správu") do | |
$stderr.puts opts | |
exit | |
end | |
end | |
opts_parser.parse! | |
if $options[:yes] == false | |
$stderr.puts "\nProgram NESPUSTENÝ" | |
$stderr.puts opts_parser.help | |
exit | |
end | |
vsetky_psc, vsetky_obce, vsetky_okresy, vsetky_kraje = [], [], [], [] | |
# pomocna premenna pre druhy subor, data na premostenie | |
ulice_pomocnik = {} | |
####### OBCE.XLS | |
puts "+ Spracúvam OBCE" | |
xls_obce = open_spreadsheet($options[:obce]) | |
hlavicka = xls_obce.row(1) | |
(2..xls_obce.last_row).each do |i| | |
riadok = Hash[[hlavicka, xls_obce.row(i)].transpose] | |
nazov_obce = riadok["DOBEC"] | |
nazov_okresu = riadok["OKRES"] | |
psc = riadok["PSC"].to_s.gsub(" ", "") | |
kod_kraju = riadok["KRAJ"] | |
kod_okresu = riadok["KOD_OKR"].to_i | |
# Kraj | |
kraje = District.where(:code => kod_kraju) | |
if kraje.size >= 2 | |
raise("Existuje viac krajov s rovnakým kódom") | |
elsif kraje.empty? | |
nazov = db_krajov[kod_kraju] | |
raise('Kód kraja je neznámy') if nazov.nil? | |
kraj = District.create!(:code => kod_kraju, :name => nazov) | |
else | |
kraj = kraje.first | |
end | |
# Okres | |
okresy = County.where(:code => kod_okresu) | |
if okresy.size >= 2 | |
raise("Existuje viac okresov s rovnakým kódom") | |
elsif okresy.empty? | |
okres = County.create!(:code => kod_okresu, :name => nazov_okresu, | |
:shortcuts => db_evc[kod_okresu], :district_id => kraj.id) | |
else | |
okres = okresy.first | |
end | |
# Obec | |
# jednotlivé pošty a PSČ sú v druhom súbore ULICE.XLS | |
# treba si zapamätať k mestu okres | |
# tieto mestá nemajú dvojníkov | |
if psc.blank? | |
ulice_pomocnik[nazov_obce] = okres.id | |
verbose("PSČ je prázdne pri obci #{nazov_obce} na riadku #{i}") | |
else | |
obce = Village.where(:name => nazov_obce, :county_id => okres.id) | |
if obce.size >= 2 | |
raise("Existuje viac obci s rovnakým nazvom a okresom") | |
elsif obce.empty? | |
obec = Village.create!(:name => nazov_obce, :zip => psc, :county_id => okres.id) | |
verbose("Vytvorená obec: " + obec.name + " " + obec.zip) | |
else | |
obec = obce.first | |
verbose("Nájdená obec: " + obec.name + " " + obec.zip) | |
end | |
end | |
#puts riadok.inspect | |
vsetky_obce << [nazov_obce, psc] | |
vsetky_okresy << kod_okresu | |
vsetky_kraje << kod_kraju | |
if i%100 == 0 | |
vsetky_obce.uniq! | |
vsetky_okresy.uniq! | |
vsetky_kraje.uniq! | |
puts "---> Je už spracovaných #{i} miest" | |
end | |
end | |
# ULICE | |
puts "+ Spracúvam ULICE" | |
xls_ulice = open_spreadsheet($options[:ulice]) | |
hlavicka = xls_ulice.row(1) | |
(2..xls_ulice.last_row).each do |i| | |
riadok = Hash[[hlavicka, xls_ulice.row(i)].transpose] | |
psc = riadok["PSC"].to_s.gsub(" ", "") | |
nazov_obce = riadok["OBCE"] | |
next if psc.blank? || nazov_obce.blank? | |
# Vrútky majú uvedené PSČ 038 61 | |
# Martin zas 036 01 | |
# | |
# v ULICIACH zas majú ulice poštu Vrútky, | |
# ale obec Martin | |
# | |
next if nazov_obce == 'Martin' | |
obce = Village.where(:name => nazov_obce, :zip => psc) | |
if obce.size >= 2 | |
raise("Existuje viac obci s rovnakým názvom a PSČ!") | |
elsif obce.empty? | |
okres_id = ulice_pomocnik[nazov_obce] | |
if okres_id.blank? | |
puts "Okres je PRÁZDNY" | |
puts "Obec:" + nazov_obce | |
puts ulice_pomocnik.inspect | |
end | |
obec = Village.create!(:name => nazov_obce, :zip => psc, :county_id => okres_id) | |
verbose("Vytvorená obec: " + obec.name + " " + obec.zip) | |
else | |
obec = obce.first | |
verbose("Nájdená obec: " + obec.name + " " + obec.zip) | |
end | |
vsetky_obce << [nazov_obce, psc] | |
if i%100 == 0 | |
vsetky_obce.uniq! | |
puts "---> Je už spracovaných #{i} ulíc" | |
end | |
end | |
puts "+ Zistujem stare zaznamy, ktore v novom subore neexistuju" | |
vsetky_obce.uniq! | |
vsetky_okresy.uniq! | |
vsetky_kraje.uniq! | |
navyse = District.where('code NOT IN (?)', vsetky_kraje) | |
pocet = navyse.size | |
puts "V databáze je naviac #{pocet} starych krajov." | |
puts navyse.inspect | |
navyse = County.where('code NOT IN (?)', vsetky_okresy) | |
pocet = navyse.size | |
puts "V databáze je naviac #{pocet} starych okresov." | |
puts navyse.inspect | |
dopyt = {:names => [], :zips => []} | |
vsetky_obce.each do |obec| | |
dopyt[:names] << obec[0] | |
dopyt[:zips] << obec[1] | |
end | |
dopyt[:names].uniq! | |
dopyt[:zips].uniq! | |
#dopyt = dopyt.join(" AND ") | |
navyse = Village.where('name NOT IN (?) AND zip NOT IN (?)', dopyt[:names], dopyt[:zips]) | |
pocet = navyse.size | |
puts "V databáze je naviac #{pocet} starych miest." | |
puts navyse.inspect | |
puts "KONIEC" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment