Last active
August 6, 2018 10:27
-
-
Save srikanthlogic/971974 to your computer and use it in GitHub Desktop.
Script to screenscrape
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Script to screenscrape 2011 India Assembly Elections | |
# Author : SrikanthLogic | |
# Date : 13 May 2011 | |
# Screen Scrapped from http://eciresults.ap.nic.in/Constituencywises031.htm | |
# Output :- http://bit.ly/jaFNTh | |
from BeautifulSoup import BeautifulSoup | |
import re | |
import urllib2 | |
import codecs | |
def getResults(state,url): | |
soup = BeautifulSoup(urllib2.urlopen(url).read()) | |
table1 = soup.find(id='div1').find(border='1') | |
update_time = soup.find(id='div1').findAll('table')[1].findAll('td',attrs={"colspan":"3"})[0].contents[0] | |
constituency = table1.findAll('td',attrs={"colspan":"3"})[0].contents[0] | |
status = table1.findAll('td',attrs={"colspan":"3"})[1].contents[0] | |
candidates = table1.findAll('tr',attrs={"style":"font-size:12px;"}) | |
total_votes = 0 | |
candidate_position = 1 | |
output_text = "" | |
for candidate in candidates: | |
total_votes += int(candidate.findAll('td')[2].contents[0]) | |
for candidate in candidates: | |
candidate_name = candidate.findAll('td')[0].contents[0] | |
candidate_party = candidate.findAll('td')[1].contents[0] | |
candidate_votes = candidate.findAll('td')[2].contents[0] | |
candidate_vote_share = float(candidate_votes) / total_votes * 100 | |
output_text = output_text + str(state)+","+str(constituency)+","+ str(update_time)+","+str(status)+","+str(candidate_name)+","+str(candidate_party)+","+str(candidate_votes)+","+str(candidate_vote_share)+","+str(candidate_position) + '\r\n' | |
candidate_position = candidate_position + 1 | |
print output_text | |
return output_text | |
def main(): | |
fl = codecs.open('electionResults.csv','wb','utf-8') | |
states = {"Assam":["S03",126],"Kerala":["S11",140],"Puduchery":["U07",30],"Tamilnadu":["S22",234],"West Bengal":["S25",294]} | |
stateList = ["Assam","Kerala","Puduchery","Tamilnadu","West Bengal"] | |
for state in stateList: | |
for i in range(states[state][1]): | |
url = 'http://eciresults.ap.nic.in/Constituencywise' + states[state][0] + str(i+1) +'.htm' | |
output = getResults(state,url) | |
fl.write(output + u'\r\n') | |
print 'Completed State' + str(state) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment