Created
June 6, 2012 19:08
-
-
Save aviraldg/2883991 to your computer and use it in GitHub Desktop.
A script that scrapes the results of the current ICSE exams for a particular school/exam centre.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sqlite3 | |
import requests | |
import bs4 | |
import datetime | |
SOURCE_URI = 'http://server2.examresults.net/icseX12-res.aspx' | |
def main(args): | |
assert len(args) >= 2 | |
conn = sqlite3.connect('icse{0}.db'.format(args[1])) | |
c = conn.cursor() | |
c.execute('''CREATE TABLE IF NOT EXISTS results (school_code int, index_no int primary key, name text, school text, dob date, | |
eng int, hin int, eed int, hcg int, mat int, sci int, cta int, eas int);''') | |
center = int(args[1]) | |
roll = 1 | |
while True: | |
response = requests.post(SOURCE_URI, {'center1': center, 'sno1': '{0:03}'.format(roll)}).text | |
if 'does not exist' in response: | |
conn.close() | |
return | |
roll += 1 | |
soup = bs4.BeautifulSoup(response) | |
metatable, table = soup.find_all('table', 'sp6') | |
data = [None] + [row.find_all('td')[1].contents[0].string.title() for row in metatable.find_all('tr')] | |
data[0], data[1] = data[1].split('/')[1:] | |
marks = [] | |
for mark in [row.find_all('td')[1].contents[0].string for row in table.find_all('tr')[1:-2]]: | |
if mark == 'XXX': | |
marks.append(None) | |
else: | |
marks.append(int(mark)) | |
data.extend(marks) | |
if 'EAS' in response: | |
data.insert(-1, None) | |
else: | |
data.append(None) | |
data[4] = datetime.date(*reversed(map(int, data[4].split('/')))) | |
c.execute('''INSERT INTO results values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)''', data) | |
print '{0}'.format(roll-1) | |
conn.commit() | |
if __name__=='__main__': | |
import sys | |
main(sys.argv) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment