Created
October 5, 2024 15:31
-
-
Save lucianghinda/ffce774f0a0d43a62bf6249c1dee700b to your computer and use it in GitHub Desktop.
Compare Substack exported subscribers with Beehiiv exported subscribers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'csv' | |
require 'time' | |
require 'set' | |
class Diff | |
MissingData = Data.define(:rows, :headers) | |
def initialize(source_file:, second_file:) | |
@source_file = source_file | |
@second_file = second_file | |
@output_filename = generate_output_filename | |
end | |
def compare | |
validate | |
missing_data = find_missing_data | |
write_output(missing_data) | |
report_results(missing_data.rows.size) | |
end | |
private | |
def validate | |
unless File.exist?(@source_file) && File.exist?(@source_file) | |
raise ArgumentError, "One or both input files do not exist." | |
end | |
end | |
def find_missing_data | |
first_data = CSV.read(@source_file, headers: true) | |
second_data = CSV.read(@second_file, headers: true) | |
first_emails = Set.new(first_data.map { _1['email'].to_s.downcase }) | |
second_emails = Set.new(second_data.map { _1['email'].to_s.downcase }) | |
missing_emails = first_emails - second_emails | |
missing_lines = first_data.select { missing_emails.include?(_1['email'].to_s.downcase) } | |
MissingData.new(rows: missing_lines, headers: first_data.headers) | |
end | |
def write_output(missing_data) | |
CSV.open(@output_filename, "wb") do |csv| | |
csv << missing_data.headers | |
missing_data.rows.each { csv << _1 } | |
end | |
end | |
def report_results(missing_count) | |
puts "Lines from the first file not present in the second file have been written to: #{@output_filename}" | |
puts "Total missing lines: #{missing_count}" | |
end | |
def generate_output_filename | |
"diff_#{Time.now.strftime('%Y%m%d_%H_%M_%S')}.csv" | |
end | |
end | |
if ARGV.length != 2 | |
puts "Usage: ruby script.rb <first_file.csv> <second_file.csv>" | |
exit | |
end | |
begin | |
diff = Diff.new(source_file: ARGV[0], second_file: ARGV[1]) | |
diff.compare | |
rescue ArgumentError => e | |
puts "Error: #{e.message}" | |
exit | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment