binary1230 · August 2, 2019 21:39
diff --git a/gistfile1.txt b/gistfile1.txt
 # all of this is terrible, don't use it. 
 # purpose: take a contact list from an IPHONE icloud dump, compare to a Hubspot export.
 # if any names aren't similar, print out a CSV file with names you should import into Hubspot.
 #
 # seriously, this is horrible I wrote it very quickly there are probably better ways to do this.

 import pandas as pd
 from fuzzywuzzy import fuzz
 import sys

 threshold_for_match = 90

 df_hubspot = pd.read_excel('data/hubspot-crm-exports-all-contacts.xls', na_filter=False)
 df_iphone = pd.read_excel('data/iphone export.xlsx', na_filter=False)

 contacts = {}

 print('\npreping iphone data')
 for index, row in df_iphone.iterrows():
    fullname = row['First Name'] + ' ' + row['Last Name']
    contacts[fullname.strip()] = {'df': row, 'include': True}
    print('.', end='')

 print('\ncross-checking iphone data')
 c = 0
 for index, row in df_hubspot.iterrows():
    fullname = row['First Name'] + ' ' + row['Last Name']
    fullname = fullname.strip()

    c += 1
    if c % 10 == 0:
        print('.', end='')
        sys.stdout.flush()

    for name, contact in contacts.items():
        if not name or not fullname:
            continue

        score = fuzz.ratio(name, fullname)
        if score > threshold_for_match:
            contact['include'] = False
            # if name != fullname:
            #    print("NAMES ARE CLOSE ENOUGH TO EXCLUDE: " + name + ' ' + fullname)
            break

 print('\n')

 dfs = []
 for name, contact in contacts.items():
    if contact['include']:
        dfs.append(contact['df'].to_dict())

 import csv

 keys = dfs[0].keys()
 with open('c:/tmp/output.csv', 'w', encoding='utf-8-sig', newline='') as output_file:
    dict_writer = csv.DictWriter(output_file, keys)
    dict_writer.writeheader()
    dict_writer.writerows(dfs)
	# all of this is terrible, don't use it.
	# purpose: take a contact list from an IPHONE icloud dump, compare to a Hubspot export.
	# if any names aren't similar, print out a CSV file with names you should import into Hubspot.
	#
	# seriously, this is horrible I wrote it very quickly there are probably better ways to do this.

	import pandas as pd
	from fuzzywuzzy import fuzz
	import sys

	threshold_for_match = 90

	df_hubspot = pd.read_excel('data/hubspot-crm-exports-all-contacts.xls', na_filter=False)
	df_iphone = pd.read_excel('data/iphone export.xlsx', na_filter=False)

	contacts = {}

	print('\npreping iphone data')
	for index, row in df_iphone.iterrows():
	fullname = row['First Name'] + ' ' + row['Last Name']
	contacts[fullname.strip()] = {'df': row, 'include': True}
	print('.', end='')

	print('\ncross-checking iphone data')
	c = 0
	for index, row in df_hubspot.iterrows():
	fullname = row['First Name'] + ' ' + row['Last Name']
	fullname = fullname.strip()

	c += 1
	if c % 10 == 0:
	print('.', end='')
	sys.stdout.flush()

	for name, contact in contacts.items():
	if not name or not fullname:
	continue

	score = fuzz.ratio(name, fullname)
	if score > threshold_for_match:
	contact['include'] = False
	# if name != fullname:
	# print("NAMES ARE CLOSE ENOUGH TO EXCLUDE: " + name + ' ' + fullname)
	break

	print('\n')

	dfs = []
	for name, contact in contacts.items():
	if contact['include']:
	dfs.append(contact['df'].to_dict())

	import csv

	keys = dfs[0].keys()
	with open('c:/tmp/output.csv', 'w', encoding='utf-8-sig', newline='') as output_file:
	dict_writer = csv.DictWriter(output_file, keys)
	dict_writer.writeheader()
	dict_writer.writerows(dfs)