Skip to content

Instantly share code, notes, and snippets.

@frankchen07
Last active July 18, 2020 16:57
Show Gist options
  • Save frankchen07/b9d9eb78ff923c15986c2c60707c4ee7 to your computer and use it in GitHub Desktop.
Save frankchen07/b9d9eb78ff923c15986c2c60707c4ee7 to your computer and use it in GitHub Desktop.
contact_script.py
import pandas as pd
pd.set_option('display.max_rows', None)
'''
cleans google contacts data
input:
contacts csv
output:
cleaned contacts csv
[first_name, last_name, phone, email]
'''
def clean_google_contacts(gdata):
# select & rename
gdata = gdata[['Given Name', 'Family Name', 'E-mail 1 - Value', 'Phone 1 - Value']]
gdata = gdata.rename(
columns={
'Given Name': 'first_name',
'Family Name': 'last_name',
'E-mail 1 - Value': 'email',
'Phone 1 - Value': 'phone'
}
)
return gdata
'''
joins current relationships sheet with google contacts
input:
relationships csv
[first_name, last_name, phone, email]
cleaned contacts csv
[first_name, last_name, phone, email]
output:
final csv coalesced and de-duplicated
[first_name, last_name, phone, email]
'''
def contacts_join(r, c):
# remove companies & establishments
r = r[(r['first_name'].notnull()) & (r['last_name'].notnull())]
c = c[(c['first_name'].notnull()) & (c['last_name'].notnull())]
# combine
f = r.merge(c, on=['first_name', 'last_name'], how='outer')
# coalesce information
f['email'] = f['email_y'].combine_first(f['email_x'])
f['phone'] = f['phone_y'].combine_first(f['phone_x'])
# select rows
f = f[['first_name', 'last_name', 'email', 'phone']]
# de-duplicate, but should do this in Google
f = f.drop_duplicates()
# save
f.to_csv('connection_fox_upload.csv', index=False)
print('saved final csv')
return f.head()
if __name__ == '__main__':
r = pd.read_csv('relationships.csv')
g = pd.read_csv('contacts.csv')
print(g.columns) # file structure of google contacts export
'''
['Name', 'Given Name', 'Additional Name', 'Family Name', 'Yomi Name',
'Given Name Yomi', 'Additional Name Yomi', 'Family Name Yomi',
'Name Prefix', 'Name Suffix', 'Initials', 'Nickname', 'Short Name',
'Maiden Name', 'Birthday', 'Gender', 'Location', 'Billing Information',
'Directory Server', 'Mileage', 'Occupation', 'Hobby', 'Sensitivity',
'Priority', 'Subject', 'Notes', 'Language', 'Photo', 'Group Membership',
'E-mail 1 - Type', 'E-mail 1 - Value', 'E-mail 2 - Type',
'E-mail 2 - Value', 'E-mail 3 - Type', 'E-mail 3 - Value',
'Phone 1 - Type', 'Phone 1 - Value', 'Address 1 - Type',
'Address 1 - Formatted', 'Address 1 - Street', 'Address 1 - City',
'Address 1 - PO Box', 'Address 1 - Region', 'Address 1 - Postal Code',
'Address 1 - Country', 'Address 1 - Extended Address',
'Organization 1 - Type', 'Organization 1 - Name',
'Organization 1 - Yomi Name', 'Organization 1 - Title',
'Organization 1 - Department', 'Organization 1 - Symbol',
'Organization 1 - Location', 'Organization 1 - Job Description',
'Website 1 - Type', 'Website 1 - Value']
'''
c = clean_google_contacts(g)
f = contacts_join(r, c)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment