Skip to content

Instantly share code, notes, and snippets.

@ismasan
Created January 16, 2025 11:46
Parse and validate loans CSV using Plumb
# Adapter from https://gist.github.com/thedumbtechguy/9e6d9abfbd0393804f185118196ea678
require 'csv'
require 'plumb'
require 'date'
require 'debug'
require 'active_support/core_ext/string'
module Types
include Plumb::Types
NumericString = Types::String.transform(Float) do |str|
str.to_s.delete(',').to_f
end
# Here perhaps you can use Types::Forms::Date
# https://github.com/ismasan/plumb/blob/main/lib/plumb/types.rb#L225
# Or, if you really only want dd/mm/yyy format, you coult match against a regexp
# DateString = String[/(\d{2})\/(\d{2})\/(\d{4})/].build(::Date, :parse)
#
# DateString = Types::String.build(::Date) do |str|
# ::Date.strptime(str, '%d/%m/%Y')
# rescue Date::Error
# nil
# end
# Here you can use Hash[**schema] instead of Hash#schema(**schema)
# They're aliases, but #[] is the consistent idiom I'm going for for any kind of sub-type definition.
# I probably need to make #schema private.
LoanRecord = Types::Hash[
loan_no: Lax::String.present,
rgn: NumericString,
account_name: Types::String,
principal_bal: NumericString,
interest_bal: NumericString,
total: NumericString,
disbursed: Forms::Date,
end_date: Forms::Date,
int_rate: NumericString,
tenure: NumericString,
current_inst: NumericString,
rem_inst: NumericString,
prinpal_pyt_mth: NumericString,
int_pyt_mth: NumericString,
total_pyt_mth: NumericString,
cagd_payment: NumericString,
disbursed_amount: NumericString,
acc_int: NumericString
]
# A normalized CSV header
NormalizedHeader = String
.invoke(%i[parameterize underscore])
.transform(::Symbol, &:to_sym)
# A CSV row that normalizes keys (CSV headers)
NormalizedCSVRow = Hash[NormalizedHeader, Any]
# A loan record with normalized keys
# I'm making this into a pipeline so that it's easy to add debugging steps in between
# Ex.
# NormalizedLoanRecord = Any.pipeline do |pl|
# pl.step NormalizedCSVRow
# pl.step do |r|
# debugger
# r
# end
# pl.step LoanRecord
# end
#
# But you can also just pipe it together with >>
# NormalizedLoanRecord = NormalizedCSVRow >> LoanRecord
NormalizedLoanRecord = Any.pipeline do |pl|
# Make sure to transform CSV rows into hashes
pl.step Any.transform(::Hash, &:to_h)
# Normalize keys
pl.step NormalizedCSVRow
# Map and validate to Loan records
pl.step LoanRecord
end
# Turn the CSV enumerator into a Plumb stream than can be iterated over.
# It will parse and validate rows as they come in.
LoanStream = Types::Stream[NormalizedLoanRecord]
end
def process_loans(filepath)
valid_loans = []
invalid_loans = []
# A CSV enumerator
data = CSV.foreach(filepath, headers: true)
# Parse CSV stream into loan records
Types::LoanStream.parse(data).each do |result|
if result.valid?
valid_loans << result.value
else
invalid_loans << {
loan_no: result.value[:loan_no],
row: result.value,
errors: result.errors
}
end
end
{
valid: valid_loans,
invalid: invalid_loans,
total_count: valid_loans.length + invalid_loans.length,
valid_count: valid_loans.length,
invalid_count: invalid_loans.length
}
end
# Process and analyze the data
results = process_loans('loans.csv')
puts "\nProcessing Summary:"
puts "Total records: #{results[:total_count]}"
puts "Valid records: #{results[:valid_count]}"
puts "Invalid records: #{results[:invalid_count]}"
if results[:valid].any?
puts "\nValid Records Details:"
results[:valid].each do |valid|
puts "\nLoan No. #{valid[:loan_no]}"
puts "Row: #{valid}"
end
end
if results[:invalid].any?
puts "\nInvalid Records Details:"
results[:invalid].each do |invalid|
puts "\nLoan No. #{invalid[:loan_no]}"
puts "Available keys: #{invalid[:row].keys.join(', ')}"
puts "Errors: #{invalid[:errors]}"
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment