Created
March 13, 2016 20:12
-
-
Save 0x4C4A/d1564c94e4b05453ec0f to your computer and use it in GitHub Desktop.
Retrieves course names from Riga Technical University course registry when given an array of valid course codes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### This script retrieves the RTU course names corresponding to the RTU course codes | |
### Works as of 13.03.16. | |
### Coded by Linards Jukmanis - 0x4C4A.com | |
# Needs python, urllib2, cookielib and regular expressions | |
import urllib2 | |
import cookielib | |
import re | |
# Input your course codes | |
courseCodes = ["REA401","FAILTEST101","REA404","RRI597","RTR524","RRE434","RTR519","IDA700","RTR512","REA506","RTR532","RRE542","IRO434","RRI417","REA603","RTR702","REA405","RTR530"] | |
# Set to False if you want the Latvian names | |
englishNames = True | |
# These variables shouldn't be changed | |
linkToDb = 'https://info.rtu.lv/rtupub/' | |
failCount = 0 | |
successCount = 0 | |
cookieJar = cookielib.CookieJar() | |
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookieJar)) | |
def getPage(url): | |
response = opener.open(url) | |
return response.read() | |
if englishNames: | |
paramAppend = "english=true&" | |
else: | |
paramAppend = "english=false&" | |
for code in courseCodes: | |
# Find link to course | |
courseSearchLink = linkToDb+'disc2/listFilter?'+paramAppend+'name='+code | |
html = getPage(courseSearchLink) | |
link = re.search(r'<a href="(disc2/o\.\d+/[^"]+)', html) | |
if link is None: | |
print(code+" has no match!") | |
failCount+= 1 | |
continue | |
# Find course name | |
courseLink = linkToDb+link.group(1) | |
coursePage = getPage(courseLink) | |
nameGroup = re.search(r'<td class="label">Name<\/td>\s*<td>([^<]+)', coursePage) | |
nameGroup2 = re.search(r'<td class="label">Nosaukums<\/td>\s*<td>([^<]+)', coursePage) | |
if nameGroup is None and nameGroup2 is None: | |
print("Couldn't find name for course with code <"+code+"> (link: "+courseLink) | |
failCount+= 1 | |
continue | |
elif nameGroup is None: | |
nameGroup = nameGroup2 | |
courseName = nameGroup.group(1).replace("\n", "").replace("\t","") | |
print(code+" - "+courseName) | |
successCount+= 1 | |
# Done | |
allCount = successCount + failCount | |
print("Done, attempted to retrieve "+str(allCount)+" entries, " | |
+str(successCount)+" succeeded, "+str(failCount)+" failed.") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment