Skip to content

Instantly share code, notes, and snippets.

@krisrak
Last active April 26, 2017 01:45
Show Gist options
  • Save krisrak/9352c01705f79f273b446caaf98cfc9a to your computer and use it in GitHub Desktop.
Save krisrak/9352c01705f79f273b446caaf98cfc9a to your computer and use it in GitHub Desktop.
Python script to find locations in bio and update followers csv exported from picodash.com
#!/usr/bin/python
import sys
import csv
import re
try:
filename = sys.argv[1]
except:
print "\nERROR: Please specify filename\n"
print "Usage:"
print " $ picodash_export_biolocation.py data.csv\n"
print "- First param should be the csv file path\n"
sys.exit(0)
# list of locations to check for, update as needed
cities = ["karachi","shanghai","mumbai","beijing","delhi","buenos aires","manila","seoul","sao paulo","moscow","jakarta","istanbul","bangkok","mexico city","tokyo","tehran","new york","kinshasa","dhaka","lagos","cairo","lima","london","tianjin","bogota","ho chi minh","hong kong","guangzhou","dongguan","lahore","rio de janeiro","baghdad","bangalore","surat","santiago","kolkata","shenyang","chongqing","singapore","riyadh","luanda","harbin","st petersburg","chennai","ahmadabad","wuhan","yangon","sydney","chengdu","shenzhen","nanjing","alexandria","johannesburg","jeddah","los angeles","abidjan","ankara","casablanca","yokohama","hyderabad","melbourne","kano","busan","kabul","changchun","cape town","guiyang","berlin","hangzhou","addis ababa","pune","pyongyang","madrid","nairobi","kanpur","kunming","surabaya","jaipur","salvador da bahia","santo domingo","mashhad","chicago","zibo","kiev","durg","huludao","caracas","qingdao","changsha","rome","fuzhou","quezon city","xian","port harcourt","osaka","incheon","shijiazhuang","taipei","zhengzhou","chittagong","toronto","ibadan","taiyuan","cali","daegu","bandung","faisalabad","fortaleza","baoshan","dar es salaam","zhongshan","xiamen","chaoyang","belo horizonte","nanning","havana","nagpur","omdurman","suzhou","aleppo","lucknow","izmir","linyi","dalian","nagoya","houston","giza","medellin","khartoum","ningbo","guayaquil","tashkent","paris","brasilia","lanzhou","changzhou","monterrey","baku","kowloon","bucharest","medan","tangshan","jilin","nanchong","conakry","amman","indore","sapporo","jinan","puebla","haiphong","macheng","curitiba","nanchang","minsk","xuzhou","kuala lumpur","bamako","huzhou","patna","suzhou anhui","urumqi","yantai","maracaibo","tianmen","hamburg","shantou","basrah","hefei","tengzhou","wuxi","sanaa","manaus","fuyang","budapest","warsaw","mecca","ecatepec","tripoli","vienna","suizhou","barcelona","damascus","agra","gaozhou","taian","quito","tianshui","montreal","nashik","shangqiu","accra","perth","harare","palembang","santa cruz","esfahan","guadalajara","phoenix","recife","neijiang","rawalpindi","tangerang","brisbane","philadelphia","kobe","hechuan","kaohsiung","pretoria","taizhou","algiers","la paz","pimpri chinchwad","barquisimeto","guigang","kumasi","luoyang","mogadishu","vadodara","quanzhou","tbilisi","tijuana","xintai","nanan","ouagadougou","benghazi","kyoto","kharkiv","kaduna","xinyang","bhopal","rugao","anyang","fukuoka","weifang","zhanjiang","hyderabad","fushun","daejeon","qiqihaer","jianyang","yaounde","multan","almaty","guiping","huazhou","kampala","tabriz","gujranwala","gwangju","belem","sofia","bursa","changde","ciudad juarez","ludhiana","tongzhou","novosibirsk","semarang","handan","kawasaki","suining","douala","karaj","liuyang","luzhou","caloocan","thane","varanasi","hanoi","munich","davao city","porto alegre","taixing","bozhou","san antonio","jinjiang","lufeng","yongcheng","guilin","pingdu","ahwaz","rajkot","santiago de los caballeros","montevideo","ekaterinburg","baotou","cordoba","milan","lianjiang","medina","mianyang","yiyang","barranquilla","irbil","adelaide","anshan","ranchi","rizhao","guarulhos","goiania","dallas","heze","nizhniy novgorod","san diego","meerut","la matanza","datong","beirut","fengcheng","ruian","laiwu","maputo","phnom penh","prague","pingdingshan","yuzhou","shiraz","cixi","allahabad","huainan","anqiu","fuqing","maiduguri","pikine-guediawaye","tegucigalpa","tunis","valencia","amritsar","qianjiang","bazhong","managua","leqing","saitama","belgrade","hiroshima","dongtai","ujung pandang","aurangabad","adana","guangyuan","qidong","rosario","samara","zapopan","nezahualcoyotl","lubumbashi","mosul","leon","omsk","solapur","brazzaville","bijie","haicheng","madurai","ulsan","leshan","jabalpur","kazan","makasar","jimo","jining","lusaka","ulaanbaatar","yerevan","donetsk","peshawar","wafangdian","copenhagen","mirat","chelyabinsk","guatemala city","shouguang","dubai","suwon","port-au-prince","odessa","goyang","seongnam","marrakech","taishan","rostov on don","dhanbad","taichung","ezhou","campinas","faridabad","jiangdu","benin","beiliu","gongzhuling","changshu","qom","dnipropetrovsk","freetown","sendai","dakar","ufa","fuzhou","yichun","birmingham","mudanjiang","volgograd","perm","fes","haora","jodhpur","san jose","torreon","mexicali","subra al-haymah","baoding","cologne","hezhou","calgary","sao luis","kitakyushu","sao goncalo","ghaziabad","visakhapatnam","wujiang","naples","chiba","mandalay","feicheng","brussels","vijayawada","haimen","kathmandu","ciudad guayana","safi","coimbatore","gaziantep","weinan","soweto","maceio","agadir","krasnoyarsk","srinagar","detroit","turin","arequipa","mbuji-mayi","songzi","sale","antananarivo","laizhou","chandigarh","zaria","abu dhabi","cartagena","teresina","danyang","sholapur","hengyang","honghu","mombasa","daye","bogor","lilongwe","duque de caxias","khulna","marseille","bucheon","norfolk","panama city","voronezh","nova iguacu","sakai","port elizabeth","saratov","benxi","haikou","stockholm","hims","kermanshah","naucalpan","hamhung","hohhot","padang","hamamatsu","ottawa","ad-dammam","dammam","niigata","liuzhou","thiruvananthapuram","valencia","bengbu","san francisco","guwahati","jacksonville","santiago de queretaro","zagreb","astana","kolwezi","hubli","daqing","nanyang","mysore","indianapolis","natal","chisinau","toluca","lodz","sao bernardo do campo","zaporizhzhya","abuja","leeds","masqat-matrah","tainan","bishkek","jerusalem","amsterdam","jixi","tiruchchirappalli","athens","krakow","cebu city","ankang","bandar lampung","malang","lviv","merida","edmonton","ogbomosho","riga","acapulco","shizuoka","columbus","jalandhar","xining","krasnodar","la plata","tangier","sevilla","joao pessoa","okayama","chihuahua","fuxin","gwalior","jinzhou","san luis potosi","tlalnepantla de baz","palermo","oran","austin","aligarh","dushanbe","zhangjiakou","amravati","bulawayo","durban","memphis","baltimore","frankfurt","aguascalientes","bhubaneswar","bangui","colombo","kingston","jamshedpur","rabat","nashville","boston","sao jose dos campos","cochabamba","milwaukee","stuttgart","washington dc","rotterdam","dortmund","kaifeng","oslo","denver","helsinki","glasgow","essen","vancouver","seattle","culiacan","bhilai","hengyang","charlotte","las vegas","lisbon","oklahoma city","kitwe","bremen","ribeirao preto","asuncion","vilnius","monrovia","bucaramanga","portland","tucson","atlanta","sheffield","surakarta"]
states = ["alabama","alaska","arizona","arkansas","california","colorado","connecticut","delaware","florida","georgia","hawaii","idaho","illinois","indiana","iowa","kansas","kentucky","louisiana","maine","maryland","massachusetts","michigan","minnesota","mississippi","missouri","montana","nebraska","nevada","new hampshire","new jersey","new mexico","new york","north carolina","north dakota","ohio","oklahoma","oregon","pennsylvania","rhode island","south carolina","south dakota","tennessee","texas","utah","vermont","virginia","washington","west virginia","wisconsin","wyoming"]
countries = ["afghanistan","albania","algeria","andorra","angola","barbuda","argentina","armenia","aruba","australia","austria","azerbaijan","bahamas","bahrain","bangladesh","barbados","belarus","belgium","belize","benin","bhutan","bolivia","bosnia and herzegovina","botswana","brazil","brunei","bulgaria","burkina faso","burma","burundi","cambodia","cameroon","canada","cabo verde","central african republic","chad","chile","china","colombia","comoros","congo","costa rica","cote d'ivoire","croatia","cuba","curacao","cyprus","czechia","denmark","djibouti","dominica","dominican republic","east timor","ecuador","egypt","el salvador","equatorial guinea","eritrea","estonia","ethiopia","fiji","finland","france","gabon","gambia","georgia","germany","ghana","greece","grenada","guatemala","guinea","guyana","haiti","holy see","honduras","hong kong","hungary","iceland","india","indonesia","iran","iraq","ireland","israel","italy","jamaica","japan","jordan","kazakhstan","kenya","kiribati","north korea","south korea","kosovo","kuwait","kyrgyzstan","laos","latvia","lebanon","lesotho","liberia","libya","liechtenstein","lithuania","luxembourg","macau","macedonia","madagascar","malawi","malaysia","maldives","mali","malta","marshall islands","mauritania","mauritius","mexico","micronesia","moldova","monaco","mongolia","montenegro","morocco","mozambique","namibia","nauru","nepal","netherlands","new zealand","nicaragua","niger","nigeria","norway","oman","pakistan","palau","palestine","panama","papua new guinea","paraguay","peru","philippines","poland","portugal","qatar","romania","russia","rwanda","saint kitts and nevis","saint lucia","saint vincent and the grenadines","samoa","san marino","sao tome and principe","saudi arabia","senegal","serbia","seychelles","sierra leone","singapore","sint maarten","slovakia","slovenia","solomon islands","somalia","south africa","south sudan","spain","sri lanka","sudan","suriname","swaziland","sweden","switzerland","syria","taiwan","tajikistan","tanzania","thailand","timor-leste","togo","tonga","trinidad and tobago","tunisia","turkey","turkmenistan","tuvalu","uganda","ukraine","united states of america","usa","united arab emirates","uae","united kingdom","uk","uruguay","uzbekistan","vanuatu","venezuela","vietnam","yemen","zambia","zimbabwe"]
locations = cities + states + countries
new_data=[]
# open csv file to find location
with open(filename, 'r') as csvfile:
csv_reader = csv.reader(csvfile)
# iterate on all rows in csv
for row_index,row in enumerate(csv_reader):
# find the index of bio column
if row_index == 0:
BIO_COL_NUM = None
for col_index,col in enumerate(row):
if col == "bio":
BIO_COL_NUM = col_index
row.append("bio_location")
new_data.append(row)
continue
# check if bio exists and find emails in rows
bio = row[BIO_COL_NUM]
if bio != '':
bio_locations = {x for x in locations if x in bio.lower()}
bio_locations_copy = list(bio_locations)
for x in bio_locations_copy:
start_index = bio.lower().find(x)
end_index = start_index + len(x)
if start_index != -1 and end_index < len(bio):
if bio[start_index-1].isalnum() or bio[end_index].isalnum():
bio_locations.remove(x)
if bio_locations:
print "Found Location: "+(','.join(bio_locations))
row.append(",".join(bio_locations))
new_data.append(row)
else:
row.append("")
new_data.append(row)
with open(filename, 'w') as csvfile:
csv_writer = csv.writer(csvfile, lineterminator='\n')
csv_writer.writerows(new_data)
print "Updated CSV with location column: " + filename
@krisrak
Copy link
Author

krisrak commented Apr 23, 2017

This is a Python script to add location column (from bio description) in csv for followers exported from picodash.com

You have to specify the csv_filename. The script will add a new column "bio_locations" and adds locations found in the bio description

Usage:
/usr/bin/python picodash_export_biolocation.py <csv_filename>

Example:
/usr/bin/python picodash_export_biolocation.py data.csv

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment