Skip to content

Instantly share code, notes, and snippets.

@ebot
Created September 20, 2012 16:12
Show Gist options
  • Save ebot/3756844 to your computer and use it in GitHub Desktop.
Save ebot/3756844 to your computer and use it in GitHub Desktop.
Compare's Iron Mountain's actual documents with what their csvs say should be there.
#!/usr/bin/env ruby -wKU
require 'faster_csv'
puts "Reading the Iron Mountain CSVs"
encounters = Hash.new
error_text = ''
enc_no = '' # Include information from a specific encounter
lines = 0
Dir.glob( '*.csv' ).each do |file|
FasterCSV.foreach(file, :headers => :first_row) do |row|
unless row['EncNo'].nil?
lines += 1
if encounters[row['EncNo']].nil?
encounters[row['EncNo']] = { :docs => 0,
:doc_types => Array.new,
:files => Array.new }
end
encounters[row['EncNo']][:docs] += 1
encounters[row['EncNo']][:doc_types] << row['DocType']
encounters[row['EncNo']][:files] << File.basename(file) unless encounters[row['EncNo']][:files].include? File.basename(file)
if row['EncNo'] == '' and row['OrgId'] != ''
error_text << "Empty encounter on line #{lines} in #{File.basename(file)}\n"
end
end
end
end
puts "Reading the directories in input"
dirs = 0
dir_encounters = Hash.new
Dir.glob( './*' ).each do |file|
if File.directory? file
dirs += 1
encounter = File.basename(file).split( '_' )[0]
dir_encounters[encounter] = 0 if dir_encounters[encounter].nil?
dir_encounters[encounter] += 1
elsif enc_no != '' and file.include? enc_no
encounters[enc_no][:csv] = File.basename file
encounters[enc_no][:idx_doc_types] = []
page_num = ''
index = File.open file, 'r'
index.each_line do |line|
if line.include? 'FILE'
page_num = line.split(':')[1].chomp.strip
end
if line.include? 'DOCUMENT TYPE'
doc_type = line.split(':')[1].chomp.strip
encounters[enc_no][:idx_doc_types] << doc_type if (page_num == '1.tif')
end
end
end
end
puts "Total Documents = #{lines} - #{dirs}"
encounters.each do |encounter, info|
if info[:docs] != dir_encounters[encounter]
puts " #{encounter} - #{info[:docs]} - #{dir_encounters[encounter]}"
info[:files].each { |file| puts " #{file}" }
elsif encounter == enc_no
puts " #{encounter} - #{info[:docs]} - #{dir_encounters[encounter]}"
info[:files].each { |file| puts " #{file}" }
puts " CSV Doc Types - #{info[:doc_types].count}:"
info[:doc_types].sort.each { |doc_type| puts " #{doc_type}" }
puts " Idx Doc Types - #{info[:idx_doc_types].count}:"
info[:idx_doc_types].sort.each { |doc_type| puts " #{doc_type}" }
unmatched = info[:doc_types] - info[:idx_doc_types]
puts " Unmatched Doc Types = #{unmatched.count}"
unmatched.each { |dt| puts " #{dt}" }
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment