Skip to content

Instantly share code, notes, and snippets.

@caguiclajmg
Last active November 6, 2017 10:42
Show Gist options
  • Save caguiclajmg/ca1a052e6819cdfcd9c60f5085a436e1 to your computer and use it in GitHub Desktop.
Save caguiclajmg/ca1a052e6819cdfcd9c60f5085a436e1 to your computer and use it in GitHub Desktop.
Scrape data from SchoolAutomate portals
#!/usr/bin/env python3
import requests
import getpass
import html.parser
class LoginFormParser(html.parser.HTMLParser):
def __init__(self):
html.parser.HTMLParser.__init__(self)
self.data = {}
def handle_starttag(self, tag, attrs):
if(tag != 'input'):
return
try:
tag_type = [item for item in attrs if 'type' in item][0][1]
if(tag_type != 'hidden'):
return
tag_name = [item for item in attrs if 'name' in item][0][1]
tag_value = [item for item in attrs if 'value' in item][0][1]
if tag_name in {'user_id', 'password', 'body_color', 'welcome_url', 'page_url', 'login_type'}:
self.data[tag_name] = tag_value
except IndexError as e:
pass
def handle_endtag(self, tag):
pass
def handle_data(self, data):
pass
class ChecklistParser(html.parser.HTMLParser):
def __init__(self):
html.parser.HTMLParser.__init__(self)
self.data = {}
self.current_tag = None
self.parse_tag = None
def handle_starttag(self, tag, attrs):
self.current_tag = tag
def handle_endtag(self, tag):
pass
def handle_data(self, data):
if self.parse_tag == None:
if 'Student ID' in data:
self.parse_tag = 'student_id'
elif 'Student name' in data:
self.parse_tag = 'student_name'
elif 'Course/Major' in data:
self.parse_tag = 'course'
elif 'Total units required for this course' in data:
self.parse_tag = 'units_required'
elif 'Year' in data:
self.parse_tag = 'year'
elif 'Status' in data:
self.parse_tag = 'status'
elif 'Total units taken' in data:
self.parse_tag = 'units_taken'
elif 'GWA' in data:
self.parse_tag = 'gwa'
else:
tags = {'student_name', 'course', 'units_required', 'year', 'status', 'units_taken', 'gwa'}
if (self.parse_tag == 'student_id') and (self.current_tag == 'b'):
self.data[self.parse_tag] = data
self.parse_tag = None
elif (self.parse_tag in tags) and (self.current_tag == 'strong'):
self.data[self.parse_tag] = data
self.parse_tag = None
def portal_login(session, username, password):
request = session.get('http://gti-binan.dyndns.org:8339/PARENTS_STUDENTS/main_files/parents_student_main_page_rightFrame.jsp')
parser = LoginFormParser()
parser.feed(request.text)
data = {}
data[parser.data['user_id']] = username
data['user_id'] = parser.data['user_id']
data[parser.data['password']] = password
data['password'] = parser.data['password']
data['is_secured'] = 1
data['x'] = 0
data['y'] = 0
data['body_color'] = parser.data['body_color']
data['welcome_url'] = parser.data['welcome_url']
data['page_url'] = parser.data['page_url']
data['login_type'] = parser.data['login_type']
request = session.post('http://gti-binan.dyndns.org:8339/commfile/login.jsp', data=data)
return ('welcome_stud.jsp' in request.text)
def portal_logout(session):
pass
def scrape_checklist(session):
request = session.get('http://gti-binan.dyndns.org:8339/PARENTS_STUDENTS/acad_performance/stud_cur_residency_eval.jsp')
parser = ChecklistParser()
parser.feed(request.text)
return parser.data
def scrape_graderelease(session):
pass
def main():
username = input('Username: ')
password = getpass.getpass()
session = requests.Session()
if portal_login(session, username, password):
data_checklist = scrape_checklist(session)
print('Student ID: ' + data_checklist['student_id'])
print('Student Name: ' + data_checklist['student_name'])
print('Course/Major: ' + data_checklist['course'])
print('Units Required: ' + data_checklist['units_required'])
print('Year: ' + data_checklist['year'])
print('Status: ' + data_checklist['status'])
print('Units Taken: ' + data_checklist['units_taken'])
print('GWA: ' + data_checklist['gwa'])
scrape_graderelease(session)
else:
print('Login failed!')
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment