Skip to content

Instantly share code, notes, and snippets.

@lucianghinda
Created October 5, 2024 15:31
Show Gist options
  • Save lucianghinda/ffce774f0a0d43a62bf6249c1dee700b to your computer and use it in GitHub Desktop.
Save lucianghinda/ffce774f0a0d43a62bf6249c1dee700b to your computer and use it in GitHub Desktop.
Compare Substack exported subscribers with Beehiiv exported subscribers
require 'csv'
require 'time'
require 'set'
class Diff
MissingData = Data.define(:rows, :headers)
def initialize(source_file:, second_file:)
@source_file = source_file
@second_file = second_file
@output_filename = generate_output_filename
end
def compare
validate
missing_data = find_missing_data
write_output(missing_data)
report_results(missing_data.rows.size)
end
private
def validate
unless File.exist?(@source_file) && File.exist?(@source_file)
raise ArgumentError, "One or both input files do not exist."
end
end
def find_missing_data
first_data = CSV.read(@source_file, headers: true)
second_data = CSV.read(@second_file, headers: true)
first_emails = Set.new(first_data.map { _1['email'].to_s.downcase })
second_emails = Set.new(second_data.map { _1['email'].to_s.downcase })
missing_emails = first_emails - second_emails
missing_lines = first_data.select { missing_emails.include?(_1['email'].to_s.downcase) }
MissingData.new(rows: missing_lines, headers: first_data.headers)
end
def write_output(missing_data)
CSV.open(@output_filename, "wb") do |csv|
csv << missing_data.headers
missing_data.rows.each { csv << _1 }
end
end
def report_results(missing_count)
puts "Lines from the first file not present in the second file have been written to: #{@output_filename}"
puts "Total missing lines: #{missing_count}"
end
def generate_output_filename
"diff_#{Time.now.strftime('%Y%m%d_%H_%M_%S')}.csv"
end
end
if ARGV.length != 2
puts "Usage: ruby script.rb <first_file.csv> <second_file.csv>"
exit
end
begin
diff = Diff.new(source_file: ARGV[0], second_file: ARGV[1])
diff.compare
rescue ArgumentError => e
puts "Error: #{e.message}"
exit
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment