xlanor · October 3, 2017 07:08
diff --git a/samplescript.py b/samplescript.py
 import time
 from selenium import webdriver
 from selenium.webdriver.common.keys import Keys
 from contextlib import closing
 from bs4 import BeautifulSoup
 import re


 url = 'https://simconnect.simge.edu.sg/psp/paprd/EMPLOYEE/HRMS/s/WEBLIB_EOPPB.ISCRIPT1.FieldFormula.Iscript_SM_Redirect?cmd=login'
 url2 = 'https://simconnect.simge.edu.sg/psp/paprd_2/EMPLOYEE/HRMS/c/SA_LEARNER_SERVICES.SSR_SSENRL_LIST.GBL?Page=SSR_SSENRL_LIST&Action=A&TargetFrameName=None'
 url3= 'https://simconnect1.simge.edu.sg:444/psc/csprd_2/EMPLOYEE/HRMS/c/SA_LEARNER_SERVICES.SSR_SSENRL_LIST.GBL' #"API" that populates timetable. It was never designed for this purpose, but we're going to use it
 #url='https://google.com/'
 username = 'usn'#change here
 password = 'pw'#change here

 #initialize PhantomJS, define settings
 driver = webdriver.PhantomJS(executable_path='./phantomjs',service_args=['--ignore-ssl-errors=true', '--ssl-protocol=TLSv1'])
 driver.set_window_size(1124, 850)  
 driver.get(url)
 time.sleep(5) #we use sleep to let the page happily load throughout the script. 
 driver.save_screenshot('out.png');
 #finds userinput box
 usr = driver.find_element_by_name('userid')
 #finds password box
 passw = driver.find_element_by_name('pwd')
 #finds login button
 logbtn = driver.find_element_by_name('Submit')

 #login
 usr.send_keys(username)
 passw.send_keys(password)
 logbtn.click()

 driver.save_screenshot('post_login.png')
 time.sleep(6)
 driver.get(url3) 
 formatted_result = driver.page_source
 soup = BeautifulSoup(formatted_result,"html.parser")
 file = open('testfile.txt','w') 
 file.write(soup.prettify())
 subjectdiv = soup.findAll('div',{'id':re.compile(r'(win2divDERIVED_REGFRM1_DESCR20\$)([0-9]{1})')})
 #subjectdiv = soup.select('div[id*=win2divDERIVED_REGFRM1_DESCR2020$]')
 for div in subjectdiv:
 	#print(div)
 	subjectitle = div.find("td",{'class':'PAGROUPDIVIDER'})
 	print(subjectitle.text)
 	singledigittablerow = div.findAll('tr',{'id':re.compile(r'(trCLASS_MTG_VW\$)([0-9]{1})(_row)([0-9]{1})')})
 	for row in singledigittablerow:
 		#here we're going to use regex.
 		#why do we repeat same regex twice with variance>?
 		#first we check for id with regex for single digit
 		#for example, MTG_DATES$0
 		#if in that div, MTGDATES$# WHERE # is a random number does not match regex, it means that we're in a row where MTGDATES$ is MTGDATES$##
 		#thus we write another regex for that row.
 		#This was probably never meant to be pulled as an API that's why its so fucked up.
 		getdate = row.find("span",{'id':re.compile(r'(MTG_DATES\$)([0-9]{1})')})
 		if not getdate.text.strip():
 			getdate = row.find("span",{'id':re.compile(r'(MTG_DATES\$)([0-9]{1})([0-9]{1})')})
 		#date is returned in the format DD/MM/YYYY - DD/MM/YYYY
 		#because the end date is redundant (unless SIM decides to hold 24hr overnight classes)
 		date = getdate.text 
 		strippeddate = (((date.strip()).split("-"))[0]).strip() #strips spaces, splits by -, returns first date.
 		print(strippeddate)
 		gettime = row.find("span",{'id':re.compile(r'(MTG_SCHED\$)([0-9]{1})')})
 		if not gettime.text.strip():
 			gettime = row.find("span",{'id':re.compile(r'(MTG_SCHED\$)([0-9]{1})([0-9]{1})')})
 		time = gettime.text
 		#time is returned as a value of DD<space>STARTTIME<AM/PM><space>-<space>ENDTIME<AM/PM>
 		strippedtime = (time.strip()[2:]).split("-") #removes space, removes DD,splits by -. This forms a list in the format [XXXXAM, YYYYAM] Where x is start, y is end
 		starttime = (strippedtime[0]).strip()
 		endtime = (strippedtime[1]).strip()
 		if len(starttime) < 7:
 			starttime = "0"+starttime
 		if len(endtime) < 7:
 			endtime = "0"+endtime
 		starttime = strippeddate + " " + starttime
 		endtime = strippeddate + " " + endtime
 		print('Start Time:'+ starttime)
 		print('End Time:'+ endtime)
 		getlocation = row.find("span",{'id':re.compile(r'(MTG_LOC\$)([0-9]{1})')})
 		if not getlocation.text.strip():
 			getlocation = row.find("span",{'id':re.compile(r'(MTG_LOC\$)([0-9]{1})([0-9]{1})')})
 		location = getlocation.text
 		print(location)
 driver.close()
	import time
	from selenium import webdriver
	from selenium.webdriver.common.keys import Keys
	from contextlib import closing
	from bs4 import BeautifulSoup
	import re


	url = 'https://simconnect.simge.edu.sg/psp/paprd/EMPLOYEE/HRMS/s/WEBLIB_EOPPB.ISCRIPT1.FieldFormula.Iscript_SM_Redirect?cmd=login'
	url2 = 'https://simconnect.simge.edu.sg/psp/paprd_2/EMPLOYEE/HRMS/c/SA_LEARNER_SERVICES.SSR_SSENRL_LIST.GBL?Page=SSR_SSENRL_LIST&Action=A&TargetFrameName=None'
	url3= 'https://simconnect1.simge.edu.sg:444/psc/csprd_2/EMPLOYEE/HRMS/c/SA_LEARNER_SERVICES.SSR_SSENRL_LIST.GBL' #"API" that populates timetable. It was never designed for this purpose, but we're going to use it
	#url='https://google.com/'
	username = 'usn'#change here
	password = 'pw'#change here

	#initialize PhantomJS, define settings
	driver = webdriver.PhantomJS(executable_path='./phantomjs',service_args=['--ignore-ssl-errors=true', '--ssl-protocol=TLSv1'])
	driver.set_window_size(1124, 850)
	driver.get(url)
	time.sleep(5) #we use sleep to let the page happily load throughout the script.
	driver.save_screenshot('out.png');
	#finds userinput box
	usr = driver.find_element_by_name('userid')
	#finds password box
	passw = driver.find_element_by_name('pwd')
	#finds login button
	logbtn = driver.find_element_by_name('Submit')

	#login
	usr.send_keys(username)
	passw.send_keys(password)
	logbtn.click()

	driver.save_screenshot('post_login.png')
	time.sleep(6)
	driver.get(url3)
	formatted_result = driver.page_source
	soup = BeautifulSoup(formatted_result,"html.parser")
	file = open('testfile.txt','w')
	file.write(soup.prettify())
	subjectdiv = soup.findAll('div',{'id':re.compile(r'(win2divDERIVED_REGFRM1_DESCR20\$)([0-9]{1})')})
	#subjectdiv = soup.select('div[id*=win2divDERIVED_REGFRM1_DESCR2020$]')
	for div in subjectdiv:
	#print(div)
	subjectitle = div.find("td",{'class':'PAGROUPDIVIDER'})
	print(subjectitle.text)
	singledigittablerow = div.findAll('tr',{'id':re.compile(r'(trCLASS_MTG_VW\$)([0-9]{1})(_row)([0-9]{1})')})
	for row in singledigittablerow:
	#here we're going to use regex.
	#why do we repeat same regex twice with variance>?
	#first we check for id with regex for single digit
	#for example, MTG_DATES$0
	#if in that div, MTGDATES$# WHERE # is a random number does not match regex, it means that we're in a row where MTGDATES$ is MTGDATES$##
	#thus we write another regex for that row.
	#This was probably never meant to be pulled as an API that's why its so fucked up.
	getdate = row.find("span",{'id':re.compile(r'(MTG_DATES\$)([0-9]{1})')})
	if not getdate.text.strip():
	getdate = row.find("span",{'id':re.compile(r'(MTG_DATES\$)([0-9]{1})([0-9]{1})')})
	#date is returned in the format DD/MM/YYYY - DD/MM/YYYY
	#because the end date is redundant (unless SIM decides to hold 24hr overnight classes)
	date = getdate.text
	strippeddate = (((date.strip()).split("-"))[0]).strip() #strips spaces, splits by -, returns first date.
	print(strippeddate)
	gettime = row.find("span",{'id':re.compile(r'(MTG_SCHED\$)([0-9]{1})')})
	if not gettime.text.strip():
	gettime = row.find("span",{'id':re.compile(r'(MTG_SCHED\$)([0-9]{1})([0-9]{1})')})
	time = gettime.text
	#time is returned as a value of DD<space>STARTTIME<AM/PM><space>-<space>ENDTIME<AM/PM>
	strippedtime = (time.strip()[2:]).split("-") #removes space, removes DD,splits by -. This forms a list in the format [XXXXAM, YYYYAM] Where x is start, y is end
	starttime = (strippedtime[0]).strip()
	endtime = (strippedtime[1]).strip()
	if len(starttime) < 7:
	starttime = "0"+starttime
	if len(endtime) < 7:
	endtime = "0"+endtime
	starttime = strippeddate + " " + starttime
	endtime = strippeddate + " " + endtime
	print('Start Time:'+ starttime)
	print('End Time:'+ endtime)
	getlocation = row.find("span",{'id':re.compile(r'(MTG_LOC\$)([0-9]{1})')})
	if not getlocation.text.strip():
	getlocation = row.find("span",{'id':re.compile(r'(MTG_LOC\$)([0-9]{1})([0-9]{1})')})
	location = getlocation.text
	print(location)
	driver.close()