Skip to content

Instantly share code, notes, and snippets.

@0x4C4A
Created March 13, 2016 20:12
Show Gist options
  • Save 0x4C4A/d1564c94e4b05453ec0f to your computer and use it in GitHub Desktop.
Save 0x4C4A/d1564c94e4b05453ec0f to your computer and use it in GitHub Desktop.
Retrieves course names from Riga Technical University course registry when given an array of valid course codes
### This script retrieves the RTU course names corresponding to the RTU course codes
### Works as of 13.03.16.
### Coded by Linards Jukmanis - 0x4C4A.com
# Needs python, urllib2, cookielib and regular expressions
import urllib2
import cookielib
import re
# Input your course codes
courseCodes = ["REA401","FAILTEST101","REA404","RRI597","RTR524","RRE434","RTR519","IDA700","RTR512","REA506","RTR532","RRE542","IRO434","RRI417","REA603","RTR702","REA405","RTR530"]
# Set to False if you want the Latvian names
englishNames = True
# These variables shouldn't be changed
linkToDb = 'https://info.rtu.lv/rtupub/'
failCount = 0
successCount = 0
cookieJar = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookieJar))
def getPage(url):
response = opener.open(url)
return response.read()
if englishNames:
paramAppend = "english=true&"
else:
paramAppend = "english=false&"
for code in courseCodes:
# Find link to course
courseSearchLink = linkToDb+'disc2/listFilter?'+paramAppend+'name='+code
html = getPage(courseSearchLink)
link = re.search(r'<a href="(disc2/o\.\d+/[^"]+)', html)
if link is None:
print(code+" has no match!")
failCount+= 1
continue
# Find course name
courseLink = linkToDb+link.group(1)
coursePage = getPage(courseLink)
nameGroup = re.search(r'<td class="label">Name<\/td>\s*<td>([^<]+)', coursePage)
nameGroup2 = re.search(r'<td class="label">Nosaukums<\/td>\s*<td>([^<]+)', coursePage)
if nameGroup is None and nameGroup2 is None:
print("Couldn't find name for course with code <"+code+"> (link: "+courseLink)
failCount+= 1
continue
elif nameGroup is None:
nameGroup = nameGroup2
courseName = nameGroup.group(1).replace("\n", "").replace("\t","")
print(code+" - "+courseName)
successCount+= 1
# Done
allCount = successCount + failCount
print("Done, attempted to retrieve "+str(allCount)+" entries, "
+str(successCount)+" succeeded, "+str(failCount)+" failed.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment