Created
September 20, 2012 16:12
-
-
Save ebot/3756844 to your computer and use it in GitHub Desktop.
Compare's Iron Mountain's actual documents with what their csvs say should be there.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby -wKU | |
require 'faster_csv' | |
puts "Reading the Iron Mountain CSVs" | |
encounters = Hash.new | |
error_text = '' | |
enc_no = '' # Include information from a specific encounter | |
lines = 0 | |
Dir.glob( '*.csv' ).each do |file| | |
FasterCSV.foreach(file, :headers => :first_row) do |row| | |
unless row['EncNo'].nil? | |
lines += 1 | |
if encounters[row['EncNo']].nil? | |
encounters[row['EncNo']] = { :docs => 0, | |
:doc_types => Array.new, | |
:files => Array.new } | |
end | |
encounters[row['EncNo']][:docs] += 1 | |
encounters[row['EncNo']][:doc_types] << row['DocType'] | |
encounters[row['EncNo']][:files] << File.basename(file) unless encounters[row['EncNo']][:files].include? File.basename(file) | |
if row['EncNo'] == '' and row['OrgId'] != '' | |
error_text << "Empty encounter on line #{lines} in #{File.basename(file)}\n" | |
end | |
end | |
end | |
end | |
puts "Reading the directories in input" | |
dirs = 0 | |
dir_encounters = Hash.new | |
Dir.glob( './*' ).each do |file| | |
if File.directory? file | |
dirs += 1 | |
encounter = File.basename(file).split( '_' )[0] | |
dir_encounters[encounter] = 0 if dir_encounters[encounter].nil? | |
dir_encounters[encounter] += 1 | |
elsif enc_no != '' and file.include? enc_no | |
encounters[enc_no][:csv] = File.basename file | |
encounters[enc_no][:idx_doc_types] = [] | |
page_num = '' | |
index = File.open file, 'r' | |
index.each_line do |line| | |
if line.include? 'FILE' | |
page_num = line.split(':')[1].chomp.strip | |
end | |
if line.include? 'DOCUMENT TYPE' | |
doc_type = line.split(':')[1].chomp.strip | |
encounters[enc_no][:idx_doc_types] << doc_type if (page_num == '1.tif') | |
end | |
end | |
end | |
end | |
puts "Total Documents = #{lines} - #{dirs}" | |
encounters.each do |encounter, info| | |
if info[:docs] != dir_encounters[encounter] | |
puts " #{encounter} - #{info[:docs]} - #{dir_encounters[encounter]}" | |
info[:files].each { |file| puts " #{file}" } | |
elsif encounter == enc_no | |
puts " #{encounter} - #{info[:docs]} - #{dir_encounters[encounter]}" | |
info[:files].each { |file| puts " #{file}" } | |
puts " CSV Doc Types - #{info[:doc_types].count}:" | |
info[:doc_types].sort.each { |doc_type| puts " #{doc_type}" } | |
puts " Idx Doc Types - #{info[:idx_doc_types].count}:" | |
info[:idx_doc_types].sort.each { |doc_type| puts " #{doc_type}" } | |
unmatched = info[:doc_types] - info[:idx_doc_types] | |
puts " Unmatched Doc Types = #{unmatched.count}" | |
unmatched.each { |dt| puts " #{dt}" } | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment