Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save klebervirgilio/1623b2b5dcf3b99e4a1333a1d2cb3f2e to your computer and use it in GitHub Desktop.
Save klebervirgilio/1623b2b5dcf3b99e4a1333a1d2cb3f2e to your computer and use it in GitHub Desktop.
require 'tempfile'
require 'bundler/inline'
require 'date'
gemfile do
source 'https://rubygems.org'
ruby '3.3.0'
gem 'pdftotext', require: true
gem 'pry', require: true
end
# Apex
APEX_PATTERN = /APEX/i
# DriveWealth
DRIVEWEALTH_PATTERN = /DriveWealth/i
notas = Dir.glob('*.pdf').map do |file|
[file, Pdftotext.text(file)]
end
def date_parser(date) = Date.strptime(date, '%m/%Od/%Y')
def apex_date_parser(date) = Date.strptime(date, '%m/%Od/%y')
def extract_from_apex(nota)
nota.split("\n").grep(/\A\d/).select { !_1.include?('Paul Street 1300') }.map(&:split).map do |line|
trade_date, settle_date, qty, ticker, price, principal, _, transaction_fee, fees, _, net_amount = line[2..12]
{ trade_date: apex_date_parser(trade_date), settle_date: apex_date_parser(settle_date), qty:, ticker:, price:, principal:,
transaction_fee:, fees:, net_amount:, line: }
end
end
def extract_from_drivewealth(nota)
nota_lines = nota.split("\n")
last_line = nil
nota_lines.to_enum.with_index.to_a.select { |(line, _)| line.include?('Agent') }.map do |line, index|
lines = nota_lines[index + 2..index + 8].map { _1.split(/\s+/).last }
principal = lines[0]
fees = lines[2]
transaction_fee = lines[3]
net_amount = lines[6]
line = line.split(/\s{6,}/).map(&:strip)
last_line = [index, line]
ticker = line[0]
qty = line[4]
dates_and_price = line[-2].split(/\s+/)
if dates_and_price.size == 2
trade_date, settle_date = dates_and_price
price = line[5]
else
price, trade_date, settle_date = dates_and_price
end
{ trade_date: date_parser(trade_date), settle_date: date_parser(settle_date), qty:, ticker:, price:, principal:,
fees:, transaction_fee:, net_amount:, line: }
end
end
result = notas.flat_map do |(file, nota)|
if nota.match(APEX_PATTERN)
puts ">>>>> Apex: #{file}"
extract_from_apex(nota).map { _1.merge(file:) }
elsif nota.match(DRIVEWEALTH_PATTERN)
puts ">>> DriveWealth: #{file}"
extract_from_drivewealth(nota).map { _1.merge(file:) }
else
raise "Unknown broker for file #{file} with content: #{nota}"
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment