Skip to content

Instantly share code, notes, and snippets.

@binary1230
Created August 2, 2019 21:39
Show Gist options
  • Save binary1230/38927b36f5b0f757c89740f8bb464fdc to your computer and use it in GitHub Desktop.
Save binary1230/38927b36f5b0f757c89740f8bb464fdc to your computer and use it in GitHub Desktop.
# all of this is terrible, don't use it.
# purpose: take a contact list from an IPHONE icloud dump, compare to a Hubspot export.
# if any names aren't similar, print out a CSV file with names you should import into Hubspot.
#
# seriously, this is horrible I wrote it very quickly there are probably better ways to do this.
import pandas as pd
from fuzzywuzzy import fuzz
import sys
threshold_for_match = 90
df_hubspot = pd.read_excel('data/hubspot-crm-exports-all-contacts.xls', na_filter=False)
df_iphone = pd.read_excel('data/iphone export.xlsx', na_filter=False)
contacts = {}
print('\npreping iphone data')
for index, row in df_iphone.iterrows():
fullname = row['First Name'] + ' ' + row['Last Name']
contacts[fullname.strip()] = {'df': row, 'include': True}
print('.', end='')
print('\ncross-checking iphone data')
c = 0
for index, row in df_hubspot.iterrows():
fullname = row['First Name'] + ' ' + row['Last Name']
fullname = fullname.strip()
c += 1
if c % 10 == 0:
print('.', end='')
sys.stdout.flush()
for name, contact in contacts.items():
if not name or not fullname:
continue
score = fuzz.ratio(name, fullname)
if score > threshold_for_match:
contact['include'] = False
# if name != fullname:
# print("NAMES ARE CLOSE ENOUGH TO EXCLUDE: " + name + ' ' + fullname)
break
print('\n')
dfs = []
for name, contact in contacts.items():
if contact['include']:
dfs.append(contact['df'].to_dict())
import csv
keys = dfs[0].keys()
with open('c:/tmp/output.csv', 'w', encoding='utf-8-sig', newline='') as output_file:
dict_writer = csv.DictWriter(output_file, keys)
dict_writer.writeheader()
dict_writer.writerows(dfs)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment