Skip to content

Instantly share code, notes, and snippets.

@neelindresh
Created February 20, 2019 19:05
Show Gist options
  • Save neelindresh/2705dc7788bcf3d7e8806d4076037cd7 to your computer and use it in GitHub Desktop.
Save neelindresh/2705dc7788bcf3d7e8806d4076037cd7 to your computer and use it in GitHub Desktop.
import pandas as pd
import numpy as np
import tqdm
data=pd.read_csv('city_country_data.csv')
df=pd.DataFrame(columns=['Country','code'])
with open('location.txt','r')as r:
location=r.readlines()
for i in tqdm.tqdm(data['country_name'].unique()):
for j in location:
if i is not np.NAN:
if i.lower() in j.lower():
del(location[location.index(j)])
coun=data.loc[data['country_name']==i,['country_name','country_iso_code']]
df.loc[-1] = coun.iloc[0].values # adding a row
df.index = df.index + 1
for i in tqdm.tqdm(data['city_name']):
if i == np.NAN or isinstance(i,float):
pass
else:
for j in location:
if i.lower() in j.lower():
del (location[location.index(j)])
coun = data.loc[data['city_name'] == i, ['country_name', 'country_iso_code']]
df.loc[-1] = coun.iloc[0].values # adding a row
df.index = df.index + 1
df.to_csv('tweet_location.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment