Created
January 16, 2025 11:46
Parse and validate loans CSV using Plumb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Adapter from https://gist.github.com/thedumbtechguy/9e6d9abfbd0393804f185118196ea678 | |
require 'csv' | |
require 'plumb' | |
require 'date' | |
require 'debug' | |
require 'active_support/core_ext/string' | |
module Types | |
include Plumb::Types | |
NumericString = Types::String.transform(Float) do |str| | |
str.to_s.delete(',').to_f | |
end | |
# Here perhaps you can use Types::Forms::Date | |
# https://github.com/ismasan/plumb/blob/main/lib/plumb/types.rb#L225 | |
# Or, if you really only want dd/mm/yyy format, you coult match against a regexp | |
# DateString = String[/(\d{2})\/(\d{2})\/(\d{4})/].build(::Date, :parse) | |
# | |
# DateString = Types::String.build(::Date) do |str| | |
# ::Date.strptime(str, '%d/%m/%Y') | |
# rescue Date::Error | |
# nil | |
# end | |
# Here you can use Hash[**schema] instead of Hash#schema(**schema) | |
# They're aliases, but #[] is the consistent idiom I'm going for for any kind of sub-type definition. | |
# I probably need to make #schema private. | |
LoanRecord = Types::Hash[ | |
loan_no: Lax::String.present, | |
rgn: NumericString, | |
account_name: Types::String, | |
principal_bal: NumericString, | |
interest_bal: NumericString, | |
total: NumericString, | |
disbursed: Forms::Date, | |
end_date: Forms::Date, | |
int_rate: NumericString, | |
tenure: NumericString, | |
current_inst: NumericString, | |
rem_inst: NumericString, | |
prinpal_pyt_mth: NumericString, | |
int_pyt_mth: NumericString, | |
total_pyt_mth: NumericString, | |
cagd_payment: NumericString, | |
disbursed_amount: NumericString, | |
acc_int: NumericString | |
] | |
# A normalized CSV header | |
NormalizedHeader = String | |
.invoke(%i[parameterize underscore]) | |
.transform(::Symbol, &:to_sym) | |
# A CSV row that normalizes keys (CSV headers) | |
NormalizedCSVRow = Hash[NormalizedHeader, Any] | |
# A loan record with normalized keys | |
# I'm making this into a pipeline so that it's easy to add debugging steps in between | |
# Ex. | |
# NormalizedLoanRecord = Any.pipeline do |pl| | |
# pl.step NormalizedCSVRow | |
# pl.step do |r| | |
# debugger | |
# r | |
# end | |
# pl.step LoanRecord | |
# end | |
# | |
# But you can also just pipe it together with >> | |
# NormalizedLoanRecord = NormalizedCSVRow >> LoanRecord | |
NormalizedLoanRecord = Any.pipeline do |pl| | |
# Make sure to transform CSV rows into hashes | |
pl.step Any.transform(::Hash, &:to_h) | |
# Normalize keys | |
pl.step NormalizedCSVRow | |
# Map and validate to Loan records | |
pl.step LoanRecord | |
end | |
# Turn the CSV enumerator into a Plumb stream than can be iterated over. | |
# It will parse and validate rows as they come in. | |
LoanStream = Types::Stream[NormalizedLoanRecord] | |
end | |
def process_loans(filepath) | |
valid_loans = [] | |
invalid_loans = [] | |
# A CSV enumerator | |
data = CSV.foreach(filepath, headers: true) | |
# Parse CSV stream into loan records | |
Types::LoanStream.parse(data).each do |result| | |
if result.valid? | |
valid_loans << result.value | |
else | |
invalid_loans << { | |
loan_no: result.value[:loan_no], | |
row: result.value, | |
errors: result.errors | |
} | |
end | |
end | |
{ | |
valid: valid_loans, | |
invalid: invalid_loans, | |
total_count: valid_loans.length + invalid_loans.length, | |
valid_count: valid_loans.length, | |
invalid_count: invalid_loans.length | |
} | |
end | |
# Process and analyze the data | |
results = process_loans('loans.csv') | |
puts "\nProcessing Summary:" | |
puts "Total records: #{results[:total_count]}" | |
puts "Valid records: #{results[:valid_count]}" | |
puts "Invalid records: #{results[:invalid_count]}" | |
if results[:valid].any? | |
puts "\nValid Records Details:" | |
results[:valid].each do |valid| | |
puts "\nLoan No. #{valid[:loan_no]}" | |
puts "Row: #{valid}" | |
end | |
end | |
if results[:invalid].any? | |
puts "\nInvalid Records Details:" | |
results[:invalid].each do |invalid| | |
puts "\nLoan No. #{invalid[:loan_no]}" | |
puts "Available keys: #{invalid[:row].keys.join(', ')}" | |
puts "Errors: #{invalid[:errors]}" | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment