Skip to content

Instantly share code, notes, and snippets.

@alcides
Created September 8, 2010 19:34
Show Gist options
  • Save alcides/570687 to your computer and use it in GitHub Desktop.
Save alcides/570687 to your computer and use it in GitHub Desktop.
import re
import urllib
import urllib2
import os
import sys
import time
USERNAME = os.environ.get("WOCU","")
PASSWORD = os.environ.get("WOCP","")
PATH = "woc"
courses_data = {'LEI':'14', 'LDM':'43', 'MEI':'15'}
years = lambda i: "%d-%02d" % (2002+i,3+i)
url = 'https://woc.uc.pt/dei/course/planocurricular.do?courseId='
durl = lambda d,t: 'https://woc.uc.pt/dei/getFile.do?id=%s&tipo=%s' % (d,t)
material = lambda cl, y : "https://woc.uc.pt/dei/class/getmaterial.do?idclass=%s&idyear=%s" % (cl, y)
avaliation = lambda cl, y : "https://woc.uc.pt/dei/class/getmaterialavaliation.do?idclass=%s&idyear=%s" % (cl, y)
projects = lambda cl, y : "https://woc.uc.pt/dei/class/getprojects.do?idclass=%s&idyear=%s" % (cl, y)
contents = {'material':material, 'evaluation':avaliation, 'projects': projects}
c_regex = re.compile('/class/getpresentation\.do\?idclass=(?P<class_id>\d{1,3})\&idyear=(?P<year>\d{1,2})">(?P<class_name>[^<]+)</a>[^<]+(?:\<b\>\(\*\)\</b\>[^<]+)?</td>[^<]+<td width="60" align="center">[^<]+<span title="(?P<semester>[1-2])')
d_regex = re.compile('/dei/getFile\.do\?tipo=(?P<download_type>\d+)&id=(?P<download_id>\d+)">download</a>')
ck_regex = re.compile('name="checkValue" value="(?P<ck>[^"]*)"')
def login():
original = urllib.urlopen("https://woc.uc.pt/dei/")
code = ""
cookie = original.headers.get('Set-Cookie')
for c in re.findall(ck_regex, original.read()):
code = str(c)
req = urllib2.Request("https://woc.uc.pt/dei/2moduledefaultlogin.do", headers = {
"Accept": "application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5",
"Content-Type": "application/x-www-form-urlencoded",
"Origin": "https://woc.uc.pt",
"Referer": "https://woc.uc.pt/dei/2moduledefaultlogout.do",
"User-Agent": "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; en-us) AppleWebKit/533.17.8 (KHTML, like Gecko) Version/5.0.1 Safari/533.17.8"
})
req.add_header('cookie', cookie)
data = urllib.urlencode([('password', PASSWORD), ('imageField2.x', "0"), ('imageField2.y', "0"),
('username', USERNAME), ('checkValue',code)])
response = urllib2.urlopen(req, data)
content = response.read()
if 'O login/password fornecido ' in content:
print 'FAILED'
sys.exit()
return cookie
cookie = login()
def request(url):
req = urllib2.Request(url)
req.add_header('cookie', cookie)
response = urllib2.urlopen(req)
return response
def ensure_dir(f):
if not os.path.exists(f):
os.makedirs(f)
def download(d):
req = request(d[1])
name = req.headers['Content-Disposition'][22:-1]
ensure_dir(d[2])
f = open(os.path.join(d[2],name),"w")
f.write(req.read())
f.close()
flatten = lambda l : sum(l, [])
flap = lambda f,g: flatten(map(f,g))
body = lambda url : request(url).read()
clinfo = lambda c,cl : [ [class_id, year, class_name.lower(), sem, c] for class_id, year, class_name, sem in re.findall(c_regex, body(cl)) ]
clean_name = lambda x: "".join(filter(lambda x:x.isalpha(), map(lambda x: x[0].upper(), filter(lambda x: len(x) > 2, x.split()))))
courses = flap(lambda c: clinfo(c, url + courses_data[c]), courses_data )
downloaded_ids = []
for c in courses:
for y in range(1,int(c[1])+1):
time.sleep(2)
for tip in contents:
f = contents[tip]
print c[0], clean_name(c[2]), f(c[0],y), c[4]
cnt = str(body(f(c[0],y)))
if "camada de dados" in cnt:
print "Bug :("
else:
print "Content :)"
clid = c[0]
programme = c[4]
abv = clean_name(c[2])
for t,i in re.findall(d_regex, cnt):
if (i,tip) in downloaded_ids:
continue
p = os.path.join(PATH, programme, abv + "_" + clid, years(y), tip)
d = (i, durl(i,t), p)
downloaded_ids.append((i,tip))
print d
download(d)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment