Last active
April 26, 2017 01:45
-
-
Save krisrak/9352c01705f79f273b446caaf98cfc9a to your computer and use it in GitHub Desktop.
Python script to find locations in bio and update followers csv exported from picodash.com
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import sys | |
import csv | |
import re | |
try: | |
filename = sys.argv[1] | |
except: | |
print "\nERROR: Please specify filename\n" | |
print "Usage:" | |
print " $ picodash_export_biolocation.py data.csv\n" | |
print "- First param should be the csv file path\n" | |
sys.exit(0) | |
# list of locations to check for, update as needed | |
cities = ["karachi","shanghai","mumbai","beijing","delhi","buenos aires","manila","seoul","sao paulo","moscow","jakarta","istanbul","bangkok","mexico city","tokyo","tehran","new york","kinshasa","dhaka","lagos","cairo","lima","london","tianjin","bogota","ho chi minh","hong kong","guangzhou","dongguan","lahore","rio de janeiro","baghdad","bangalore","surat","santiago","kolkata","shenyang","chongqing","singapore","riyadh","luanda","harbin","st petersburg","chennai","ahmadabad","wuhan","yangon","sydney","chengdu","shenzhen","nanjing","alexandria","johannesburg","jeddah","los angeles","abidjan","ankara","casablanca","yokohama","hyderabad","melbourne","kano","busan","kabul","changchun","cape town","guiyang","berlin","hangzhou","addis ababa","pune","pyongyang","madrid","nairobi","kanpur","kunming","surabaya","jaipur","salvador da bahia","santo domingo","mashhad","chicago","zibo","kiev","durg","huludao","caracas","qingdao","changsha","rome","fuzhou","quezon city","xian","port harcourt","osaka","incheon","shijiazhuang","taipei","zhengzhou","chittagong","toronto","ibadan","taiyuan","cali","daegu","bandung","faisalabad","fortaleza","baoshan","dar es salaam","zhongshan","xiamen","chaoyang","belo horizonte","nanning","havana","nagpur","omdurman","suzhou","aleppo","lucknow","izmir","linyi","dalian","nagoya","houston","giza","medellin","khartoum","ningbo","guayaquil","tashkent","paris","brasilia","lanzhou","changzhou","monterrey","baku","kowloon","bucharest","medan","tangshan","jilin","nanchong","conakry","amman","indore","sapporo","jinan","puebla","haiphong","macheng","curitiba","nanchang","minsk","xuzhou","kuala lumpur","bamako","huzhou","patna","suzhou anhui","urumqi","yantai","maracaibo","tianmen","hamburg","shantou","basrah","hefei","tengzhou","wuxi","sanaa","manaus","fuyang","budapest","warsaw","mecca","ecatepec","tripoli","vienna","suizhou","barcelona","damascus","agra","gaozhou","taian","quito","tianshui","montreal","nashik","shangqiu","accra","perth","harare","palembang","santa cruz","esfahan","guadalajara","phoenix","recife","neijiang","rawalpindi","tangerang","brisbane","philadelphia","kobe","hechuan","kaohsiung","pretoria","taizhou","algiers","la paz","pimpri chinchwad","barquisimeto","guigang","kumasi","luoyang","mogadishu","vadodara","quanzhou","tbilisi","tijuana","xintai","nanan","ouagadougou","benghazi","kyoto","kharkiv","kaduna","xinyang","bhopal","rugao","anyang","fukuoka","weifang","zhanjiang","hyderabad","fushun","daejeon","qiqihaer","jianyang","yaounde","multan","almaty","guiping","huazhou","kampala","tabriz","gujranwala","gwangju","belem","sofia","bursa","changde","ciudad juarez","ludhiana","tongzhou","novosibirsk","semarang","handan","kawasaki","suining","douala","karaj","liuyang","luzhou","caloocan","thane","varanasi","hanoi","munich","davao city","porto alegre","taixing","bozhou","san antonio","jinjiang","lufeng","yongcheng","guilin","pingdu","ahwaz","rajkot","santiago de los caballeros","montevideo","ekaterinburg","baotou","cordoba","milan","lianjiang","medina","mianyang","yiyang","barranquilla","irbil","adelaide","anshan","ranchi","rizhao","guarulhos","goiania","dallas","heze","nizhniy novgorod","san diego","meerut","la matanza","datong","beirut","fengcheng","ruian","laiwu","maputo","phnom penh","prague","pingdingshan","yuzhou","shiraz","cixi","allahabad","huainan","anqiu","fuqing","maiduguri","pikine-guediawaye","tegucigalpa","tunis","valencia","amritsar","qianjiang","bazhong","managua","leqing","saitama","belgrade","hiroshima","dongtai","ujung pandang","aurangabad","adana","guangyuan","qidong","rosario","samara","zapopan","nezahualcoyotl","lubumbashi","mosul","leon","omsk","solapur","brazzaville","bijie","haicheng","madurai","ulsan","leshan","jabalpur","kazan","makasar","jimo","jining","lusaka","ulaanbaatar","yerevan","donetsk","peshawar","wafangdian","copenhagen","mirat","chelyabinsk","guatemala city","shouguang","dubai","suwon","port-au-prince","odessa","goyang","seongnam","marrakech","taishan","rostov on don","dhanbad","taichung","ezhou","campinas","faridabad","jiangdu","benin","beiliu","gongzhuling","changshu","qom","dnipropetrovsk","freetown","sendai","dakar","ufa","fuzhou","yichun","birmingham","mudanjiang","volgograd","perm","fes","haora","jodhpur","san jose","torreon","mexicali","subra al-haymah","baoding","cologne","hezhou","calgary","sao luis","kitakyushu","sao goncalo","ghaziabad","visakhapatnam","wujiang","naples","chiba","mandalay","feicheng","brussels","vijayawada","haimen","kathmandu","ciudad guayana","safi","coimbatore","gaziantep","weinan","soweto","maceio","agadir","krasnoyarsk","srinagar","detroit","turin","arequipa","mbuji-mayi","songzi","sale","antananarivo","laizhou","chandigarh","zaria","abu dhabi","cartagena","teresina","danyang","sholapur","hengyang","honghu","mombasa","daye","bogor","lilongwe","duque de caxias","khulna","marseille","bucheon","norfolk","panama city","voronezh","nova iguacu","sakai","port elizabeth","saratov","benxi","haikou","stockholm","hims","kermanshah","naucalpan","hamhung","hohhot","padang","hamamatsu","ottawa","ad-dammam","dammam","niigata","liuzhou","thiruvananthapuram","valencia","bengbu","san francisco","guwahati","jacksonville","santiago de queretaro","zagreb","astana","kolwezi","hubli","daqing","nanyang","mysore","indianapolis","natal","chisinau","toluca","lodz","sao bernardo do campo","zaporizhzhya","abuja","leeds","masqat-matrah","tainan","bishkek","jerusalem","amsterdam","jixi","tiruchchirappalli","athens","krakow","cebu city","ankang","bandar lampung","malang","lviv","merida","edmonton","ogbomosho","riga","acapulco","shizuoka","columbus","jalandhar","xining","krasnodar","la plata","tangier","sevilla","joao pessoa","okayama","chihuahua","fuxin","gwalior","jinzhou","san luis potosi","tlalnepantla de baz","palermo","oran","austin","aligarh","dushanbe","zhangjiakou","amravati","bulawayo","durban","memphis","baltimore","frankfurt","aguascalientes","bhubaneswar","bangui","colombo","kingston","jamshedpur","rabat","nashville","boston","sao jose dos campos","cochabamba","milwaukee","stuttgart","washington dc","rotterdam","dortmund","kaifeng","oslo","denver","helsinki","glasgow","essen","vancouver","seattle","culiacan","bhilai","hengyang","charlotte","las vegas","lisbon","oklahoma city","kitwe","bremen","ribeirao preto","asuncion","vilnius","monrovia","bucaramanga","portland","tucson","atlanta","sheffield","surakarta"] | |
states = ["alabama","alaska","arizona","arkansas","california","colorado","connecticut","delaware","florida","georgia","hawaii","idaho","illinois","indiana","iowa","kansas","kentucky","louisiana","maine","maryland","massachusetts","michigan","minnesota","mississippi","missouri","montana","nebraska","nevada","new hampshire","new jersey","new mexico","new york","north carolina","north dakota","ohio","oklahoma","oregon","pennsylvania","rhode island","south carolina","south dakota","tennessee","texas","utah","vermont","virginia","washington","west virginia","wisconsin","wyoming"] | |
countries = ["afghanistan","albania","algeria","andorra","angola","barbuda","argentina","armenia","aruba","australia","austria","azerbaijan","bahamas","bahrain","bangladesh","barbados","belarus","belgium","belize","benin","bhutan","bolivia","bosnia and herzegovina","botswana","brazil","brunei","bulgaria","burkina faso","burma","burundi","cambodia","cameroon","canada","cabo verde","central african republic","chad","chile","china","colombia","comoros","congo","costa rica","cote d'ivoire","croatia","cuba","curacao","cyprus","czechia","denmark","djibouti","dominica","dominican republic","east timor","ecuador","egypt","el salvador","equatorial guinea","eritrea","estonia","ethiopia","fiji","finland","france","gabon","gambia","georgia","germany","ghana","greece","grenada","guatemala","guinea","guyana","haiti","holy see","honduras","hong kong","hungary","iceland","india","indonesia","iran","iraq","ireland","israel","italy","jamaica","japan","jordan","kazakhstan","kenya","kiribati","north korea","south korea","kosovo","kuwait","kyrgyzstan","laos","latvia","lebanon","lesotho","liberia","libya","liechtenstein","lithuania","luxembourg","macau","macedonia","madagascar","malawi","malaysia","maldives","mali","malta","marshall islands","mauritania","mauritius","mexico","micronesia","moldova","monaco","mongolia","montenegro","morocco","mozambique","namibia","nauru","nepal","netherlands","new zealand","nicaragua","niger","nigeria","norway","oman","pakistan","palau","palestine","panama","papua new guinea","paraguay","peru","philippines","poland","portugal","qatar","romania","russia","rwanda","saint kitts and nevis","saint lucia","saint vincent and the grenadines","samoa","san marino","sao tome and principe","saudi arabia","senegal","serbia","seychelles","sierra leone","singapore","sint maarten","slovakia","slovenia","solomon islands","somalia","south africa","south sudan","spain","sri lanka","sudan","suriname","swaziland","sweden","switzerland","syria","taiwan","tajikistan","tanzania","thailand","timor-leste","togo","tonga","trinidad and tobago","tunisia","turkey","turkmenistan","tuvalu","uganda","ukraine","united states of america","usa","united arab emirates","uae","united kingdom","uk","uruguay","uzbekistan","vanuatu","venezuela","vietnam","yemen","zambia","zimbabwe"] | |
locations = cities + states + countries | |
new_data=[] | |
# open csv file to find location | |
with open(filename, 'r') as csvfile: | |
csv_reader = csv.reader(csvfile) | |
# iterate on all rows in csv | |
for row_index,row in enumerate(csv_reader): | |
# find the index of bio column | |
if row_index == 0: | |
BIO_COL_NUM = None | |
for col_index,col in enumerate(row): | |
if col == "bio": | |
BIO_COL_NUM = col_index | |
row.append("bio_location") | |
new_data.append(row) | |
continue | |
# check if bio exists and find emails in rows | |
bio = row[BIO_COL_NUM] | |
if bio != '': | |
bio_locations = {x for x in locations if x in bio.lower()} | |
bio_locations_copy = list(bio_locations) | |
for x in bio_locations_copy: | |
start_index = bio.lower().find(x) | |
end_index = start_index + len(x) | |
if start_index != -1 and end_index < len(bio): | |
if bio[start_index-1].isalnum() or bio[end_index].isalnum(): | |
bio_locations.remove(x) | |
if bio_locations: | |
print "Found Location: "+(','.join(bio_locations)) | |
row.append(",".join(bio_locations)) | |
new_data.append(row) | |
else: | |
row.append("") | |
new_data.append(row) | |
with open(filename, 'w') as csvfile: | |
csv_writer = csv.writer(csvfile, lineterminator='\n') | |
csv_writer.writerows(new_data) | |
print "Updated CSV with location column: " + filename |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is a Python script to add location column (from bio description) in csv for followers exported from picodash.com
You have to specify the csv_filename. The script will add a new column "bio_locations" and adds locations found in the bio description
Usage:
/usr/bin/python picodash_export_biolocation.py <csv_filename>
Example:
/usr/bin/python picodash_export_biolocation.py data.csv