Skip to content

Instantly share code, notes, and snippets.

@xlanor
Created October 3, 2017 07:08
Show Gist options
  • Save xlanor/6f7ef47fc3988c588ef08bb2316826f4 to your computer and use it in GitHub Desktop.
Save xlanor/6f7ef47fc3988c588ef08bb2316826f4 to your computer and use it in GitHub Desktop.
samplescript.py
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from contextlib import closing
from bs4 import BeautifulSoup
import re
url = 'https://simconnect.simge.edu.sg/psp/paprd/EMPLOYEE/HRMS/s/WEBLIB_EOPPB.ISCRIPT1.FieldFormula.Iscript_SM_Redirect?cmd=login'
url2 = 'https://simconnect.simge.edu.sg/psp/paprd_2/EMPLOYEE/HRMS/c/SA_LEARNER_SERVICES.SSR_SSENRL_LIST.GBL?Page=SSR_SSENRL_LIST&Action=A&TargetFrameName=None'
url3= 'https://simconnect1.simge.edu.sg:444/psc/csprd_2/EMPLOYEE/HRMS/c/SA_LEARNER_SERVICES.SSR_SSENRL_LIST.GBL' #"API" that populates timetable. It was never designed for this purpose, but we're going to use it
#url='https://google.com/'
username = 'usn'#change here
password = 'pw'#change here
#initialize PhantomJS, define settings
driver = webdriver.PhantomJS(executable_path='./phantomjs',service_args=['--ignore-ssl-errors=true', '--ssl-protocol=TLSv1'])
driver.set_window_size(1124, 850)
driver.get(url)
time.sleep(5) #we use sleep to let the page happily load throughout the script.
driver.save_screenshot('out.png');
#finds userinput box
usr = driver.find_element_by_name('userid')
#finds password box
passw = driver.find_element_by_name('pwd')
#finds login button
logbtn = driver.find_element_by_name('Submit')
#login
usr.send_keys(username)
passw.send_keys(password)
logbtn.click()
driver.save_screenshot('post_login.png')
time.sleep(6)
driver.get(url3)
formatted_result = driver.page_source
soup = BeautifulSoup(formatted_result,"html.parser")
file = open('testfile.txt','w')
file.write(soup.prettify())
subjectdiv = soup.findAll('div',{'id':re.compile(r'(win2divDERIVED_REGFRM1_DESCR20\$)([0-9]{1})')})
#subjectdiv = soup.select('div[id*=win2divDERIVED_REGFRM1_DESCR2020$]')
for div in subjectdiv:
#print(div)
subjectitle = div.find("td",{'class':'PAGROUPDIVIDER'})
print(subjectitle.text)
singledigittablerow = div.findAll('tr',{'id':re.compile(r'(trCLASS_MTG_VW\$)([0-9]{1})(_row)([0-9]{1})')})
for row in singledigittablerow:
#here we're going to use regex.
#why do we repeat same regex twice with variance>?
#first we check for id with regex for single digit
#for example, MTG_DATES$0
#if in that div, MTGDATES$# WHERE # is a random number does not match regex, it means that we're in a row where MTGDATES$ is MTGDATES$##
#thus we write another regex for that row.
#This was probably never meant to be pulled as an API that's why its so fucked up.
getdate = row.find("span",{'id':re.compile(r'(MTG_DATES\$)([0-9]{1})')})
if not getdate.text.strip():
getdate = row.find("span",{'id':re.compile(r'(MTG_DATES\$)([0-9]{1})([0-9]{1})')})
#date is returned in the format DD/MM/YYYY - DD/MM/YYYY
#because the end date is redundant (unless SIM decides to hold 24hr overnight classes)
date = getdate.text
strippeddate = (((date.strip()).split("-"))[0]).strip() #strips spaces, splits by -, returns first date.
print(strippeddate)
gettime = row.find("span",{'id':re.compile(r'(MTG_SCHED\$)([0-9]{1})')})
if not gettime.text.strip():
gettime = row.find("span",{'id':re.compile(r'(MTG_SCHED\$)([0-9]{1})([0-9]{1})')})
time = gettime.text
#time is returned as a value of DD<space>STARTTIME<AM/PM><space>-<space>ENDTIME<AM/PM>
strippedtime = (time.strip()[2:]).split("-") #removes space, removes DD,splits by -. This forms a list in the format [XXXXAM, YYYYAM] Where x is start, y is end
starttime = (strippedtime[0]).strip()
endtime = (strippedtime[1]).strip()
if len(starttime) < 7:
starttime = "0"+starttime
if len(endtime) < 7:
endtime = "0"+endtime
starttime = strippeddate + " " + starttime
endtime = strippeddate + " " + endtime
print('Start Time:'+ starttime)
print('End Time:'+ endtime)
getlocation = row.find("span",{'id':re.compile(r'(MTG_LOC\$)([0-9]{1})')})
if not getlocation.text.strip():
getlocation = row.find("span",{'id':re.compile(r'(MTG_LOC\$)([0-9]{1})([0-9]{1})')})
location = getlocation.text
print(location)
driver.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment