Created
August 2, 2019 21:39
-
-
Save binary1230/38927b36f5b0f757c89740f8bb464fdc to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# all of this is terrible, don't use it. | |
# purpose: take a contact list from an IPHONE icloud dump, compare to a Hubspot export. | |
# if any names aren't similar, print out a CSV file with names you should import into Hubspot. | |
# | |
# seriously, this is horrible I wrote it very quickly there are probably better ways to do this. | |
import pandas as pd | |
from fuzzywuzzy import fuzz | |
import sys | |
threshold_for_match = 90 | |
df_hubspot = pd.read_excel('data/hubspot-crm-exports-all-contacts.xls', na_filter=False) | |
df_iphone = pd.read_excel('data/iphone export.xlsx', na_filter=False) | |
contacts = {} | |
print('\npreping iphone data') | |
for index, row in df_iphone.iterrows(): | |
fullname = row['First Name'] + ' ' + row['Last Name'] | |
contacts[fullname.strip()] = {'df': row, 'include': True} | |
print('.', end='') | |
print('\ncross-checking iphone data') | |
c = 0 | |
for index, row in df_hubspot.iterrows(): | |
fullname = row['First Name'] + ' ' + row['Last Name'] | |
fullname = fullname.strip() | |
c += 1 | |
if c % 10 == 0: | |
print('.', end='') | |
sys.stdout.flush() | |
for name, contact in contacts.items(): | |
if not name or not fullname: | |
continue | |
score = fuzz.ratio(name, fullname) | |
if score > threshold_for_match: | |
contact['include'] = False | |
# if name != fullname: | |
# print("NAMES ARE CLOSE ENOUGH TO EXCLUDE: " + name + ' ' + fullname) | |
break | |
print('\n') | |
dfs = [] | |
for name, contact in contacts.items(): | |
if contact['include']: | |
dfs.append(contact['df'].to_dict()) | |
import csv | |
keys = dfs[0].keys() | |
with open('c:/tmp/output.csv', 'w', encoding='utf-8-sig', newline='') as output_file: | |
dict_writer = csv.DictWriter(output_file, keys) | |
dict_writer.writeheader() | |
dict_writer.writerows(dfs) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment