Last active
July 18, 2020 16:57
-
-
Save frankchen07/b9d9eb78ff923c15986c2c60707c4ee7 to your computer and use it in GitHub Desktop.
contact_script.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
pd.set_option('display.max_rows', None) | |
''' | |
cleans google contacts data | |
input: | |
contacts csv | |
output: | |
cleaned contacts csv | |
[first_name, last_name, phone, email] | |
''' | |
def clean_google_contacts(gdata): | |
# select & rename | |
gdata = gdata[['Given Name', 'Family Name', 'E-mail 1 - Value', 'Phone 1 - Value']] | |
gdata = gdata.rename( | |
columns={ | |
'Given Name': 'first_name', | |
'Family Name': 'last_name', | |
'E-mail 1 - Value': 'email', | |
'Phone 1 - Value': 'phone' | |
} | |
) | |
return gdata | |
''' | |
joins current relationships sheet with google contacts | |
input: | |
relationships csv | |
[first_name, last_name, phone, email] | |
cleaned contacts csv | |
[first_name, last_name, phone, email] | |
output: | |
final csv coalesced and de-duplicated | |
[first_name, last_name, phone, email] | |
''' | |
def contacts_join(r, c): | |
# remove companies & establishments | |
r = r[(r['first_name'].notnull()) & (r['last_name'].notnull())] | |
c = c[(c['first_name'].notnull()) & (c['last_name'].notnull())] | |
# combine | |
f = r.merge(c, on=['first_name', 'last_name'], how='outer') | |
# coalesce information | |
f['email'] = f['email_y'].combine_first(f['email_x']) | |
f['phone'] = f['phone_y'].combine_first(f['phone_x']) | |
# select rows | |
f = f[['first_name', 'last_name', 'email', 'phone']] | |
# de-duplicate, but should do this in Google | |
f = f.drop_duplicates() | |
# save | |
f.to_csv('connection_fox_upload.csv', index=False) | |
print('saved final csv') | |
return f.head() | |
if __name__ == '__main__': | |
r = pd.read_csv('relationships.csv') | |
g = pd.read_csv('contacts.csv') | |
print(g.columns) # file structure of google contacts export | |
''' | |
['Name', 'Given Name', 'Additional Name', 'Family Name', 'Yomi Name', | |
'Given Name Yomi', 'Additional Name Yomi', 'Family Name Yomi', | |
'Name Prefix', 'Name Suffix', 'Initials', 'Nickname', 'Short Name', | |
'Maiden Name', 'Birthday', 'Gender', 'Location', 'Billing Information', | |
'Directory Server', 'Mileage', 'Occupation', 'Hobby', 'Sensitivity', | |
'Priority', 'Subject', 'Notes', 'Language', 'Photo', 'Group Membership', | |
'E-mail 1 - Type', 'E-mail 1 - Value', 'E-mail 2 - Type', | |
'E-mail 2 - Value', 'E-mail 3 - Type', 'E-mail 3 - Value', | |
'Phone 1 - Type', 'Phone 1 - Value', 'Address 1 - Type', | |
'Address 1 - Formatted', 'Address 1 - Street', 'Address 1 - City', | |
'Address 1 - PO Box', 'Address 1 - Region', 'Address 1 - Postal Code', | |
'Address 1 - Country', 'Address 1 - Extended Address', | |
'Organization 1 - Type', 'Organization 1 - Name', | |
'Organization 1 - Yomi Name', 'Organization 1 - Title', | |
'Organization 1 - Department', 'Organization 1 - Symbol', | |
'Organization 1 - Location', 'Organization 1 - Job Description', | |
'Website 1 - Type', 'Website 1 - Value'] | |
''' | |
c = clean_google_contacts(g) | |
f = contacts_join(r, c) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment