Skip to content

Instantly share code, notes, and snippets.

@hans
Last active December 21, 2015 17:08
Show Gist options
  • Save hans/6338224 to your computer and use it in GitHub Desktop.
Save hans/6338224 to your computer and use it in GitHub Desktop.
Stanford 2013 courses grouped by GER / WAY requirements

Jump to:

GER: DB-NatSci

GER:DB-EngrAppSci

GER:DB-Hum

GER:DB-Math

GER:DB-SocSci

GER:EC-AmerCul

GER:EC-EthicReas

GER:EC-Gender

GER:EC-GlobalCom

GER:IHUM-1

GER:IHUM-2

GER:IHUM-3

Language

THINK

WAY-AII

WAY-AQR

WAY-CE

WAY-ED

WAY-ER

WAY-FR

WAY-SI

WAY-SMA

Writing 2

Writing SLE

# IPython history, dumped + cleaned up a bit (not tested post-cleanup)
from collections import defaultdict
import requests
from lxml.cssselect import cssselect
from lxml.html import document_fromstring
content = requests.get("http://explorecourses.stanford.edu/print?filter-ger-WAYSI=on&filter-ger-WAYFR=on&filter-coursestatus-Active=on&filter-ger-WAYAQR=on&filter-ger-WAYSMA=on&filter-ger-WAYCE=on&filter-ger-WAYED=on&filter-ger-WAYER=on&filter-ger-WAYAII=on&q=all+courses").text
doc = document_fromstring(content)
class_els = doc.xpath('/html/body/div[2]/div[2]/div[1]/div[2]/div')
def parseCourse(course_el):
reqs_match = re.search(r'UG Reqs: (.+?)\s*\|', course_el.cssselect('.courseAttributes')[0].text_content(), re.S)
return (course_el.cssselect('span.courseNumber')[0].text_content().strip().rstrip(':'),
course_el.cssselect('span.courseTitle')[0].text_content().strip(),
[x.strip() for x in reqs_match.group(1).strip().split(',')] if reqs_match is not None else [])
courses = [parseCourse(e) for e in class_els]
courses = {id: (title, gers) for id, title, gers in courses}
gers = defaultdict(list)
for id, title, course_gers in courses:
for cg in course_gers:
gers[cg].append(id)
def build_ger_html(ger_name):
els = [u'<li><a href="http://explorecourses.stanford.edu/search?view=catalog&filter-coursestatus-Active=on&page=0&catalog=&academicYear=&q={}&collapse="><span class="courseId">{}</span> <span class="courseName">{}</span></a></li>'.format(id, id, courses[id][0]) for id in sorted(gers[ger_name])]
return u'\n\n## {}\n<ul>{}</ul>'.format(ger_name, u'\n'.join(els))
with open('stanford_gers_2013.html', 'w') as f:
f.write(u''.join(build_ger_html(ger) for ger in sorted(gers.keys())).encode('utf-8'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment