Last active
August 29, 2015 14:16
-
-
Save fikriauliya/59b19b2ce8b08fa46644 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
contents = File.read('raw_rapbd.txt') | |
EXCEPTIONS = [/^Urusan Pemerintahan :/, /^Organisasi :/, /^Jumlah/, /^$/, /^Surplus\/\(Defisit\)/, /^Pembiayaan Netto/] | |
contents.each_line do |l| | |
l = l.strip | |
unless /^\d\.\d\d/.match(l) | |
if EXCEPTIONS.none? {|e| e.match(l)} | |
print(" #{l}") | |
else | |
print("\n#{l}") | |
end | |
else | |
print("\n#{l}") | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
contents = File.read('raw_rapbd_2.txt') | |
puts "Urusan Pemerintahan|Organisasi|Kode Rekening|Uraian|Jumlah" | |
contents.each_line do |l| | |
if l.strip.length == 0 then next end | |
if (m = /^Urusan Pemerintahan : (.+)/.match(l)) | |
@urusan_pemerintah = m[1] | |
elsif (m = /^Organisasi : (.+)/.match(l)) | |
@organisasi = m[1] | |
else | |
l.strip! | |
if (m = /(\d\.\d{2} \d{3} \d{2} \d{3} (\d )?(\d )?(\d )?)(.+)/.match(l)) then | |
kode_rekening = m[1].strip | |
remaining = m[-1] | |
if (m = /(.+?)([\.\d]+\d{3})$/.match(remaining)) then | |
uraian = m[1].strip | |
jumlah = m[2].strip | |
puts "#{@urusan_pemerintah}|#{@organisasi}|#{kode_rekening}|#{uraian}|#{jumlah}" | |
else | |
puts "#{@urusan_pemerintah}|#{@organisasi}|#{kode_rekening}|#{remaining}|" | |
end | |
end | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def separate_with_dot(number) | |
number.to_s.chars.to_a.reverse.each_slice(3).map(&:join).join(".").reverse | |
end | |
contents = File.read('raw_rapbd_3.txt') | |
puts "Urusan Pemerintahan|Organisasi|Kode Rekening|Uraian|Belanja Barang dan Jasa|Belanja Modal|Belanja Pegawai|Total" | |
first_line = true | |
@last_jumlah = @total = @total_child = 0 | |
@belanja_barang_dan_jasa = @belanja_modal = @belanja_pegawai = 0 | |
contents.each_line do |l| | |
urusan_pemerintahan, organisasi, kode_rekening, uraian, jumlah = l.split("|") | |
jumlah.gsub!(/\./, '') | |
jumlah = jumlah.to_i | |
if first_line then first_line = false | |
else | |
last_kode_rekening = kode_rekening[15..-1].strip | |
case uraian | |
when 'BELANJA BARANG DAN JASA' | |
@belanja_barang_dan_jasa += jumlah | |
when 'BELANJA PEGAWAI' | |
@belanja_pegawai += jumlah | |
when 'BELANJA MODAL' | |
@belanja_modal += jumlah | |
end | |
if last_kode_rekening.length <= 1 and @prev_kode_rekening | |
puts "#{@prev_urusan_pemerintahan}|#{@prev_organisasi}|#{@prev_kode_rekening}|#{@prev_uraian}|#{separate_with_dot(@belanja_barang_dan_jasa)}|#{separate_with_dot(@belanja_modal)}|#{separate_with_dot(@belanja_pegawai)}|#{separate_with_dot(@total)}" | |
@total = @last_jumlah = @belanja_modal = @belanja_pegawai = @belanja_barang_dan_jasa = 0 | |
elsif last_kode_rekening.length == 3 | |
if @last_jumlah != 0 | |
if @last_jumlah != @total_child | |
puts "Error! #{@last_jumlah} != #{@total_child}" | |
end | |
@last_jumlah = 0 | |
end | |
@total += jumlah | |
@last_jumlah = jumlah | |
@total_child = 0 | |
elsif last_kode_rekening.length == 5 | |
@total_child += jumlah | |
end | |
if last_kode_rekening.length <= 1 | |
@prev_urusan_pemerintahan, @prev_organisasi, @prev_kode_rekening, @prev_uraian, @prev_jumlah = urusan_pemerintahan, organisasi, kode_rekening, uraian, jumlah | |
end | |
# Original Algorithm by Arief: | |
# | |
# if (last_kode_rekening.length() <= 1) { | |
# print(kode_rekening + uraian + total); | |
# total = 0; | |
# } | |
# if (last_kode_rekening.length() == 3) {\ | |
# if (last_jumlah != 0) { | |
# if (last_jumlah != total_child) | |
# gak sama | |
# last_jumlah = 0; | |
# } | |
# total += jumlah; | |
# last_jumlah = jumlah; | |
# total_child = 0; | |
# } | |
# if (last_kode_rekening.length() == 5) { | |
# total_child += jumlah; | |
# } | |
end | |
end | |
if @prev_kode_rekening | |
puts "#{@prev_urusan_pemerintahan}|#{@prev_organisasi}|#{@prev_kode_rekening}|#{@prev_uraian}|#{separate_with_dot(@belanja_barang_dan_jasa)}|#{separate_with_dot(@belanja_modal)}|#{separate_with_dot(@belanja_pegawai)}|#{separate_with_dot(@total)}" | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def separate_with_comma(number) | |
res = number.abs.to_s.chars.to_a.reverse.each_slice(3).map(&:join).join(",").reverse | |
if number < 0 then return "-#{res}" | |
else return res end | |
end | |
dprd_version = File.read('join_apbd.csv') | |
kode_kegiatan_to_pagu = Hash.new | |
nama_kegiatan_to_pagu = Hash.new | |
kode_nama_kegiatan_to_pagu = Hash.new | |
# kode_kegiatan_to_line = Hash.new | |
# dprd_version.each_line do |l| | |
# _, kode_skpd, nama_skpd, komisi, kode_kegiatan, nama_kegiatan, pagu, tambah, kurang, _, _, _, hasil_pembahasan = /(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|/.match(l).to_a | |
# unless kode_kegiatan.strip.empty? | |
# k = [kode_skpd, nama_skpd, komisi, kode_kegiatan].join("_") | |
# if kode_kegiatan_to_line.key?(k) | |
# if kode_kegiatan_to_line[k]['name'] == nama_kegiatan then print("* ") end | |
# puts "Duplicate keys: #{kode_skpd}|#{nama_skpd}|#{komisi}|#{kode_kegiatan} -> #{kode_kegiatan_to_line[k]['name']} (#{kode_kegiatan_to_line[k]['jumlah']}) vs #{nama_kegiatan} (#{hasil_pembahasan})" | |
# else | |
# kode_kegiatan_to_line[k] = Hash.new | |
# kode_kegiatan_to_line[k]['name'] = nama_kegiatan | |
# kode_kegiatan_to_line[k]['jumlah'] = hasil_pembahasan | |
# end | |
# end | |
# end | |
dprd_version.each_line do |l| | |
_, kode_skpd, nama_skpd, komisi, kode_kegiatan, nama_kegiatan, pagu, tambah, kurang, _, _, _, hasil_pembahasan = /(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|/.match(l).to_a | |
unless (kode_kegiatan.strip.empty? or nama_kegiatan.strip.empty?) | |
if kode_nama_kegiatan_to_pagu.key?([kode_skpd, kode_kegiatan, nama_kegiatan].join("_")) then puts "Duplicate kode & nama: #{[kode_skpd, kode_kegiatan, nama_kegiatan].join("_")} -> #{pagu} vs #{kode_nama_kegiatan_to_pagu[[kode_skpd, kode_kegiatan, nama_kegiatan].join("_")]}" end | |
kode_nama_kegiatan_to_pagu[[kode_skpd, kode_kegiatan, nama_kegiatan].join("_")] = pagu | |
end | |
unless kode_kegiatan.strip.empty? | |
if kode_kegiatan_to_pagu.key?(kode_kegiatan) then puts "Duplicate kode: #{kode_kegiatan}" end | |
kode_kegiatan_to_pagu[kode_kegiatan] = pagu | |
end | |
unless nama_kegiatan.strip.empty? | |
if nama_kegiatan_to_pagu.key?(nama_kegiatan) then puts "Duplicate nama: #{nama_kegiatan}" end | |
nama_kegiatan_to_pagu[nama_kegiatan] = pagu | |
end | |
end | |
# pemda_version = File.read('raw_rapbd_4.txt') | |
# puts("Urusan Pemerintahan|Organisasi|Kode Rekening|Uraian|Jumlah|Pagu_DPRD|Flag") | |
# first_line = true | |
# pemda_version.each_line do |l| | |
# if first_line then | |
# first_line = false | |
# else | |
# _, urusan_pemerintahan, organisasi, kode_rekening, uraian, jumlah = /(.*)\|(.*)\|(.*)\|(.*)\|(.*)/.match(l).to_a | |
# # 1.01 001 01 121 -> 1.01.01.001.121 | |
# translatted_kode_rekening = kode_rekening[0..3] + "." + kode_rekening[9..10] + "." + kode_rekening[5..7] + "." + kode_rekening[12..-1] | |
# pagu = nil | |
# pagu ||= kode_nama_kegiatan_to_pagu[[translatted_kode_rekening, uraian].join("_")] | |
# pagu ||= kode_kegiatan_to_pagu[translatted_kode_rekening] | |
# # pagu ||= nama_kegiatan_to_pagu[uraian] | |
# jumlah.gsub!(/\./, ',') | |
# if pagu | |
# if pagu == jumlah | |
# flag = 0 | |
# else | |
# flag = separate_with_comma(jumlah.gsub(/\,/, '').to_i - pagu.gsub(/\,/, '').to_i) | |
# end | |
# else | |
# flag = "Doesn't exist in DPRD" | |
# end | |
# print "#{urusan_pemerintahan}|#{organisasi}|#{kode_rekening}|#{uraian}|#{jumlah}|#{pagu}|#{flag}\n" | |
# end | |
# end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This code needs refactoring | |
require 'fuzzy_match' | |
require 'set' | |
require 'amatch' | |
def separate_with_comma(number) | |
if number.empty? then return 0 end | |
number = number.to_i | |
res = number.abs.to_s.chars.to_a.reverse.each_slice(3).map(&:join).join(",").reverse | |
if number < 0 then return "-#{res}" | |
else return res end | |
end | |
pemda_version = File.read('mata.anggaran.csv') | |
kode_rekening_program_to_total = Hash.new | |
kode_rekening_to_total = Hash.new | |
program_to_total = Hash.new | |
first_line = true | |
all_programs = Set.new | |
pemda_version.each_line do |l| | |
if first_line | |
first_line = false | |
else | |
_, urusan_pemerintahan, organisasi, kategori, kode_rekening, program, _, _, _, total = /(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)/.match(l).to_a | |
total = total.to_s.gsub(/\,/, "").gsub(/\./, "") | |
# puts kode_rekening, program | |
# if kode_rekening_to_total.key? kode_rekening then puts "Duplicate kode_rekening: #{kode_rekening}" end | |
# if kode_rekening_program_to_total.key? [kode_rekening, program].join("_") then puts "Duplicate kode_rekening_program: #{kode_rekening}_#{program}" end | |
# if program_to_total.key? program then puts "Duplicate program: #{program}" end | |
kode_rekening_program_to_total[[kode_rekening, program.downcase].join("_")] ||= [] | |
kode_rekening_to_total[kode_rekening] ||= [] | |
program_to_total[program.downcase] ||= [] | |
kode_rekening_program_to_total[[kode_rekening, program.downcase].join("_")] << total | |
kode_rekening_to_total[kode_rekening] << total | |
program_to_total[program.downcase] << total | |
all_programs.add(program.downcase) | |
end | |
end | |
# fm = FuzzyMatch.new(all_programs.to_a) | |
dprd_version = File.read('join_apbd.csv') | |
first_line = true | |
puts "no|kode_skpd|nama_skpd|komisi|kode_kegiatan|nama_kegiatan|pagu|tambah|kurang|hasil_pembahasan|hasil_pemprov_version|delta = hasil_pembahasan - hasil_pemprov_version|flag|kegiatan_versi_pemprov_yang_mirip|% kemiripan|hasil_pemprov_version_yang_mirip|delta = hasil_pemprov_version_yang_mirip - hasil_pemprov_version" | |
number = 1 | |
dprd_version.each_line do |l| | |
if first_line | |
first_line = false | |
else | |
_, kode_skpd, nama_skpd, komisi, kode_kegiatan, nama_kegiatan, pagu, tambah, kurang, hasil_pembahasan = /(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|/.match(l).to_a | |
# 1.01.01.001.121 -> 1.01 001 01 121 | |
# puts kode_kegiatan | |
unless kode_kegiatan.empty? | |
translatted_kode_kegiatan = kode_kegiatan[0..3] + " " + kode_kegiatan[8..10] + " " + kode_kegiatan[5..6] + " " + kode_kegiatan[12..-1] | |
else | |
translatted_kode_kegiatan = "" | |
end | |
# puts translatted_kode_kegiatan, nama_kegiatan | |
flag = "Not found in Pemprov version based on kode & nama kegiatan" | |
total = kode_rekening_program_to_total[[translatted_kode_kegiatan, nama_kegiatan.downcase].join("_")] | |
if total then flag = "Perfect match" end | |
if total.nil? then | |
total = kode_rekening_to_total[translatted_kode_kegiatan] | |
if total then flag = "Matched by kode kegiatan" end | |
end | |
if total.nil? then | |
total = program_to_total[nama_kegiatan.downcase] | |
if total then flag = "Matched by nama kegiatan" end | |
end | |
pagu = separate_with_comma(pagu.gsub(/\,/, "").gsub(/\./, "")) | |
tambah = separate_with_comma(tambah.gsub(/\,/, "").gsub(/\./, "")) | |
kurang = separate_with_comma(kurang.gsub(/\,/, "").gsub(/\./, "")) | |
original_hasil_pembahasan = hasil_pembahasan.gsub(/\,/, "").gsub(/\./, "").to_i | |
hasil_pembahasan = separate_with_comma(hasil_pembahasan.gsub(/\,/, "").gsub(/\./, "")) | |
recommended_nama_kegiatan = nil | |
recommended_total = nil | |
recommended_score = nil | |
if total.nil? | |
# recommended_nama_kegiatan = fm.find(nama_kegiatan) | |
recommended_nama_kegiatan = nil | |
recommended_score = -1 | |
all_programs.each do |p| | |
score = p.levenshtein_similar(nama_kegiatan.downcase) | |
if recommended_score < score | |
recommended_score = score | |
recommended_nama_kegiatan = p | |
end | |
end | |
if program_to_total[recommended_nama_kegiatan].length == 1 | |
recommended_delta = separate_with_comma((original_hasil_pembahasan - program_to_total[recommended_nama_kegiatan][0].to_i).to_s) | |
end | |
if program_to_total[recommended_nama_kegiatan].length > 3 | |
recommended_total = "Too many variants" | |
else | |
t = program_to_total[recommended_nama_kegiatan].map{|e| separate_with_comma(e)} | |
recommended_total = t.join("/") | |
end | |
end | |
if total.nil? | |
puts "#{number}|#{kode_skpd}|#{nama_skpd}|#{komisi}|#{kode_kegiatan}|#{nama_kegiatan}|#{pagu}|#{tambah}|#{kurang}|#{hasil_pembahasan}||#{hasil_pembahasan}|#{flag}|#{recommended_nama_kegiatan}|#{recommended_score}|#{recommended_total}|#{recommended_delta}" | |
else | |
if total.length == 1 | |
delta = separate_with_comma((original_hasil_pembahasan - total[0].to_i).to_s) | |
end | |
if total.length > 3 | |
total = "Too many variants" | |
else | |
total = total.map{|e| separate_with_comma(e)} | |
total = total.join("/") | |
end | |
puts "#{number}|#{kode_skpd}|#{nama_skpd}|#{komisi}|#{kode_kegiatan}|#{nama_kegiatan}|#{pagu}|#{tambah}|#{kurang}|#{hasil_pembahasan}|#{total}|#{delta}|#{flag}|#{recommended_nama_kegiatan}|#{recommended_score}|#{recommended_total}|#{recommended_delta}" | |
end | |
number += 1 | |
end | |
end |
raw_rapbd_2.txt dari output 0_rapbd_cleaner.rb. raw_rapbd_3.txt dari output 1_rapbd_to_csv.rb.
raw_rapbd.txt dari copy paste PDF di sini www.jakarta.go.id/v2/news/2015/03/rapbd-pemprov-dki-dan-dprd#.VPLDDCwxs7w -> https://drive.google.com/file/d/0B9EJd8nA4QE8R0s1ellncWtPcmc/view?usp=sharing
terimakasih mas.. tak cobai melihat dengan cara yang berbeda
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
file raw_rapbd.txt, raw_rapbd_2.txt dapat dari mana ya mas. pengen nyobain