Skip to content

Instantly share code, notes, and snippets.

@shazeline
Created November 27, 2013 04:22
Show Gist options
  • Save shazeline/7670630 to your computer and use it in GitHub Desktop.
Save shazeline/7670630 to your computer and use it in GitHub Desktop.
Raw scraper code from the demo. The code is bad and I should feel bad.
import re
import requests
from bs4 import BeautifulSoup
BASE = 'http://www.registrar.ucla.edu/schedule/'
def get_dept_url(term, dept):
return BASE + 'crsredir.aspx?termsel=%s&subareasel=%s' % (term, dept)
def get_course_url(term, dept, id):
return BASE + 'detselect.aspx?termsel=%s&subareasel=%s&idxcrs=%s' % (term, dept, id)
def cell_text(cell):
return ' '.join(cell.stripped_strings)
def parse_table(table):
data = []
for row in table.find_all('tr'):
row_data = map(cell_text, row.find_all(re.compile('t[dh]')))[::2]
data.append(row_data)
return data
response = requests.get(BASE + 'schedulehome.aspx')
soup = BeautifulSoup(response.text)
values = []
for option in soup.find_all('option'):
values.append(option.get('value'))
terms = values[0:5]
dept_codes = values[5:]
winter = terms[1]
dept_urls = []
for dept_code in dept_codes:
dept_urls.append(get_dept_url(winter, dept_code))
for i, dept_url in enumerate(dept_urls):
response = requests.get(dept_url)
soup = BeautifulSoup(response.text)
course_ids = []
for option in soup.find_all('option'):
course_ids.append(option.get('value'))
course_urls = []
for course_id in course_ids:
course_url = get_course_url(winter, dept_codes[i], course_id)
course_urls.append(course_url)
for course_url in course_urls:
response = requests.get(course_url)
soup = BeautifulSoup(response.text)
tables = soup.find_all('table')
if len(tables) < 8:
break
enrollment_table = tables[8]
parsed_table = parse_table(enrollment_table)
print course_url
# print parsed_table
for i in range(1, len(parsed_table)):
try:
print parsed_table[i][1] + ' ' + parsed_table[i][2] + ':\t' + parsed_table[i][13]
except:
print 'whoops!'
print '=========================='
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment