Created
October 3, 2017 07:08
-
-
Save xlanor/6f7ef47fc3988c588ef08bb2316826f4 to your computer and use it in GitHub Desktop.
samplescript.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
from selenium import webdriver | |
from selenium.webdriver.common.keys import Keys | |
from contextlib import closing | |
from bs4 import BeautifulSoup | |
import re | |
url = 'https://simconnect.simge.edu.sg/psp/paprd/EMPLOYEE/HRMS/s/WEBLIB_EOPPB.ISCRIPT1.FieldFormula.Iscript_SM_Redirect?cmd=login' | |
url2 = 'https://simconnect.simge.edu.sg/psp/paprd_2/EMPLOYEE/HRMS/c/SA_LEARNER_SERVICES.SSR_SSENRL_LIST.GBL?Page=SSR_SSENRL_LIST&Action=A&TargetFrameName=None' | |
url3= 'https://simconnect1.simge.edu.sg:444/psc/csprd_2/EMPLOYEE/HRMS/c/SA_LEARNER_SERVICES.SSR_SSENRL_LIST.GBL' #"API" that populates timetable. It was never designed for this purpose, but we're going to use it | |
#url='https://google.com/' | |
username = 'usn'#change here | |
password = 'pw'#change here | |
#initialize PhantomJS, define settings | |
driver = webdriver.PhantomJS(executable_path='./phantomjs',service_args=['--ignore-ssl-errors=true', '--ssl-protocol=TLSv1']) | |
driver.set_window_size(1124, 850) | |
driver.get(url) | |
time.sleep(5) #we use sleep to let the page happily load throughout the script. | |
driver.save_screenshot('out.png'); | |
#finds userinput box | |
usr = driver.find_element_by_name('userid') | |
#finds password box | |
passw = driver.find_element_by_name('pwd') | |
#finds login button | |
logbtn = driver.find_element_by_name('Submit') | |
#login | |
usr.send_keys(username) | |
passw.send_keys(password) | |
logbtn.click() | |
driver.save_screenshot('post_login.png') | |
time.sleep(6) | |
driver.get(url3) | |
formatted_result = driver.page_source | |
soup = BeautifulSoup(formatted_result,"html.parser") | |
file = open('testfile.txt','w') | |
file.write(soup.prettify()) | |
subjectdiv = soup.findAll('div',{'id':re.compile(r'(win2divDERIVED_REGFRM1_DESCR20\$)([0-9]{1})')}) | |
#subjectdiv = soup.select('div[id*=win2divDERIVED_REGFRM1_DESCR2020$]') | |
for div in subjectdiv: | |
#print(div) | |
subjectitle = div.find("td",{'class':'PAGROUPDIVIDER'}) | |
print(subjectitle.text) | |
singledigittablerow = div.findAll('tr',{'id':re.compile(r'(trCLASS_MTG_VW\$)([0-9]{1})(_row)([0-9]{1})')}) | |
for row in singledigittablerow: | |
#here we're going to use regex. | |
#why do we repeat same regex twice with variance>? | |
#first we check for id with regex for single digit | |
#for example, MTG_DATES$0 | |
#if in that div, MTGDATES$# WHERE # is a random number does not match regex, it means that we're in a row where MTGDATES$ is MTGDATES$## | |
#thus we write another regex for that row. | |
#This was probably never meant to be pulled as an API that's why its so fucked up. | |
getdate = row.find("span",{'id':re.compile(r'(MTG_DATES\$)([0-9]{1})')}) | |
if not getdate.text.strip(): | |
getdate = row.find("span",{'id':re.compile(r'(MTG_DATES\$)([0-9]{1})([0-9]{1})')}) | |
#date is returned in the format DD/MM/YYYY - DD/MM/YYYY | |
#because the end date is redundant (unless SIM decides to hold 24hr overnight classes) | |
date = getdate.text | |
strippeddate = (((date.strip()).split("-"))[0]).strip() #strips spaces, splits by -, returns first date. | |
print(strippeddate) | |
gettime = row.find("span",{'id':re.compile(r'(MTG_SCHED\$)([0-9]{1})')}) | |
if not gettime.text.strip(): | |
gettime = row.find("span",{'id':re.compile(r'(MTG_SCHED\$)([0-9]{1})([0-9]{1})')}) | |
time = gettime.text | |
#time is returned as a value of DD<space>STARTTIME<AM/PM><space>-<space>ENDTIME<AM/PM> | |
strippedtime = (time.strip()[2:]).split("-") #removes space, removes DD,splits by -. This forms a list in the format [XXXXAM, YYYYAM] Where x is start, y is end | |
starttime = (strippedtime[0]).strip() | |
endtime = (strippedtime[1]).strip() | |
if len(starttime) < 7: | |
starttime = "0"+starttime | |
if len(endtime) < 7: | |
endtime = "0"+endtime | |
starttime = strippeddate + " " + starttime | |
endtime = strippeddate + " " + endtime | |
print('Start Time:'+ starttime) | |
print('End Time:'+ endtime) | |
getlocation = row.find("span",{'id':re.compile(r'(MTG_LOC\$)([0-9]{1})')}) | |
if not getlocation.text.strip(): | |
getlocation = row.find("span",{'id':re.compile(r'(MTG_LOC\$)([0-9]{1})([0-9]{1})')}) | |
location = getlocation.text | |
print(location) | |
driver.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment