Created
April 21, 2024 22:21
-
-
Save klebervirgilio/1623b2b5dcf3b99e4a1333a1d2cb3f2e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'tempfile' | |
require 'bundler/inline' | |
require 'date' | |
gemfile do | |
source 'https://rubygems.org' | |
ruby '3.3.0' | |
gem 'pdftotext', require: true | |
gem 'pry', require: true | |
end | |
# Apex | |
APEX_PATTERN = /APEX/i | |
# DriveWealth | |
DRIVEWEALTH_PATTERN = /DriveWealth/i | |
notas = Dir.glob('*.pdf').map do |file| | |
[file, Pdftotext.text(file)] | |
end | |
def date_parser(date) = Date.strptime(date, '%m/%Od/%Y') | |
def apex_date_parser(date) = Date.strptime(date, '%m/%Od/%y') | |
def extract_from_apex(nota) | |
nota.split("\n").grep(/\A\d/).select { !_1.include?('Paul Street 1300') }.map(&:split).map do |line| | |
trade_date, settle_date, qty, ticker, price, principal, _, transaction_fee, fees, _, net_amount = line[2..12] | |
{ trade_date: apex_date_parser(trade_date), settle_date: apex_date_parser(settle_date), qty:, ticker:, price:, principal:, | |
transaction_fee:, fees:, net_amount:, line: } | |
end | |
end | |
def extract_from_drivewealth(nota) | |
nota_lines = nota.split("\n") | |
last_line = nil | |
nota_lines.to_enum.with_index.to_a.select { |(line, _)| line.include?('Agent') }.map do |line, index| | |
lines = nota_lines[index + 2..index + 8].map { _1.split(/\s+/).last } | |
principal = lines[0] | |
fees = lines[2] | |
transaction_fee = lines[3] | |
net_amount = lines[6] | |
line = line.split(/\s{6,}/).map(&:strip) | |
last_line = [index, line] | |
ticker = line[0] | |
qty = line[4] | |
dates_and_price = line[-2].split(/\s+/) | |
if dates_and_price.size == 2 | |
trade_date, settle_date = dates_and_price | |
price = line[5] | |
else | |
price, trade_date, settle_date = dates_and_price | |
end | |
{ trade_date: date_parser(trade_date), settle_date: date_parser(settle_date), qty:, ticker:, price:, principal:, | |
fees:, transaction_fee:, net_amount:, line: } | |
end | |
end | |
result = notas.flat_map do |(file, nota)| | |
if nota.match(APEX_PATTERN) | |
puts ">>>>> Apex: #{file}" | |
extract_from_apex(nota).map { _1.merge(file:) } | |
elsif nota.match(DRIVEWEALTH_PATTERN) | |
puts ">>> DriveWealth: #{file}" | |
extract_from_drivewealth(nota).map { _1.merge(file:) } | |
else | |
raise "Unknown broker for file #{file} with content: #{nota}" | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment