alcides · September 8, 2010 19:34
diff --git a/woc_backup.py b/woc_backup.py
 import re
 import urllib
 import urllib2
 import os
 import sys
 import time


 USERNAME = os.environ.get("WOCU","")
 PASSWORD = os.environ.get("WOCP","")
 PATH = "woc"

 courses_data = {'LEI':'14', 'LDM':'43', 'MEI':'15'}
 years = lambda i: "%d-%02d" % (2002+i,3+i)

 url = 'https://woc.uc.pt/dei/course/planocurricular.do?courseId='
 durl = lambda d,t: 'https://woc.uc.pt/dei/getFile.do?id=%s&tipo=%s' % (d,t)

 material = lambda cl, y : "https://woc.uc.pt/dei/class/getmaterial.do?idclass=%s&idyear=%s" % (cl, y)
 avaliation = lambda cl, y : "https://woc.uc.pt/dei/class/getmaterialavaliation.do?idclass=%s&idyear=%s" % (cl, y)
 projects = lambda cl, y : "https://woc.uc.pt/dei/class/getprojects.do?idclass=%s&idyear=%s" % (cl, y)
 contents = {'material':material, 'evaluation':avaliation, 'projects': projects}

 c_regex = re.compile('/class/getpresentation\.do\?idclass=(?P<class_id>\d{1,3})\&idyear=(?P<year>\d{1,2})">(?P<class_name>[^<]+)</a>[^<]+(?:\<b\>\(\*\)\</b\>[^<]+)?</td>[^<]+<td width="60" align="center">[^<]+<span title="(?P<semester>[1-2])')
 d_regex = re.compile('/dei/getFile\.do\?tipo=(?P<download_type>\d+)&id=(?P<download_id>\d+)">download</a>')
 ck_regex = re.compile('name="checkValue" value="(?P<ck>[^"]*)"')

 def login():
    original = urllib.urlopen("https://woc.uc.pt/dei/")
    code = ""
    cookie = original.headers.get('Set-Cookie')
    for c in re.findall(ck_regex, original.read()):
        code = str(c)
        
    req = urllib2.Request("https://woc.uc.pt/dei/2moduledefaultlogin.do", headers = {
        "Accept": "application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5",
        "Content-Type": "application/x-www-form-urlencoded",
        "Origin": "https://woc.uc.pt",
        "Referer": "https://woc.uc.pt/dei/2moduledefaultlogout.do",
        "User-Agent": "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; en-us) AppleWebKit/533.17.8 (KHTML, like Gecko) Version/5.0.1 Safari/533.17.8"
    })
    req.add_header('cookie', cookie)
    data = urllib.urlencode([('password', PASSWORD), ('imageField2.x', "0"), ('imageField2.y', "0"),
                ('username', USERNAME), ('checkValue',code)])
    response = urllib2.urlopen(req, data)
    content = response.read()
    if 'O login/password fornecido ' in content:
        print 'FAILED'
        sys.exit()
    return cookie

 cookie = login()

 def request(url):
    req = urllib2.Request(url)
    req.add_header('cookie', cookie)
    response = urllib2.urlopen(req)
    return response


 def ensure_dir(f):
    if not os.path.exists(f):
        os.makedirs(f)

 def download(d):
    req = request(d[1])
    name = req.headers['Content-Disposition'][22:-1]
    ensure_dir(d[2])
    f = open(os.path.join(d[2],name),"w")
    f.write(req.read())
    f.close()


 flatten = lambda l : sum(l, [])
 flap = lambda f,g: flatten(map(f,g))
 body = lambda url : request(url).read()
 clinfo = lambda c,cl : [ [class_id, year, class_name.lower(), sem, c] for class_id, year, class_name, sem in re.findall(c_regex, body(cl)) ]
 clean_name = lambda x: "".join(filter(lambda x:x.isalpha(), map(lambda x: x[0].upper(), filter(lambda x: len(x) > 2, x.split()))))

 courses = flap(lambda c: clinfo(c, url + courses_data[c]), courses_data )

 downloaded_ids = []
 for c in courses:
    for y in range(1,int(c[1])+1):
        time.sleep(2)
        for tip in contents:
            f = contents[tip]
        
            print c[0], clean_name(c[2]), f(c[0],y), c[4]
            cnt = str(body(f(c[0],y)))
            if "camada de dados" in cnt:
                print "Bug :("
            else:
                print "Content :)"
                clid = c[0]
                programme = c[4]
                abv = clean_name(c[2])
    
                for t,i in re.findall(d_regex, cnt):
                    if (i,tip) in downloaded_ids:
                        continue
                    
                    p = os.path.join(PATH, programme, abv + "_" + clid, years(y), tip)
                    d = (i, durl(i,t), p)
                    downloaded_ids.append((i,tip))
                    print d
                    download(d)
	import re
	import urllib
	import urllib2
	import os
	import sys
	import time


	USERNAME = os.environ.get("WOCU","")
	PASSWORD = os.environ.get("WOCP","")
	PATH = "woc"

	courses_data = {'LEI':'14', 'LDM':'43', 'MEI':'15'}
	years = lambda i: "%d-%02d" % (2002+i,3+i)

	url = 'https://woc.uc.pt/dei/course/planocurricular.do?courseId='
	durl = lambda d,t: 'https://woc.uc.pt/dei/getFile.do?id=%s&tipo=%s' % (d,t)

	material = lambda cl, y : "https://woc.uc.pt/dei/class/getmaterial.do?idclass=%s&idyear=%s" % (cl, y)
	avaliation = lambda cl, y : "https://woc.uc.pt/dei/class/getmaterialavaliation.do?idclass=%s&idyear=%s" % (cl, y)
	projects = lambda cl, y : "https://woc.uc.pt/dei/class/getprojects.do?idclass=%s&idyear=%s" % (cl, y)
	contents = {'material':material, 'evaluation':avaliation, 'projects': projects}

	c_regex = re.compile('/class/getpresentation\.do\?idclass=(?P<class_id>\d{1,3})\&idyear=(?P<year>\d{1,2})">(?P<class_name>[^<]+)</a>[^<]+(?:\<b\>\(\*\)\</b\>[^<]+)?</td>[^<]+<td width="60" align="center">[^<]+<span title="(?P<semester>[1-2])')
	d_regex = re.compile('/dei/getFile\.do\?tipo=(?P<download_type>\d+)&id=(?P<download_id>\d+)">download</a>')
	ck_regex = re.compile('name="checkValue" value="(?P<ck>[^"]*)"')

	def login():
	original = urllib.urlopen("https://woc.uc.pt/dei/")
	code = ""
	cookie = original.headers.get('Set-Cookie')
	for c in re.findall(ck_regex, original.read()):
	code = str(c)

	req = urllib2.Request("https://woc.uc.pt/dei/2moduledefaultlogin.do", headers = {
	"Accept": "application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,/;q=0.5",
	"Content-Type": "application/x-www-form-urlencoded",
	"Origin": "https://woc.uc.pt",
	"Referer": "https://woc.uc.pt/dei/2moduledefaultlogout.do",
	"User-Agent": "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; en-us) AppleWebKit/533.17.8 (KHTML, like Gecko) Version/5.0.1 Safari/533.17.8"
	})
	req.add_header('cookie', cookie)
	data = urllib.urlencode([('password', PASSWORD), ('imageField2.x', "0"), ('imageField2.y', "0"),
	('username', USERNAME), ('checkValue',code)])
	response = urllib2.urlopen(req, data)
	content = response.read()
	if 'O login/password fornecido ' in content:
	print 'FAILED'
	sys.exit()
	return cookie

	cookie = login()

	def request(url):
	req = urllib2.Request(url)
	req.add_header('cookie', cookie)
	response = urllib2.urlopen(req)
	return response


	def ensure_dir(f):
	if not os.path.exists(f):
	os.makedirs(f)

	def download(d):
	req = request(d[1])
	name = req.headers['Content-Disposition'][22:-1]
	ensure_dir(d[2])
	f = open(os.path.join(d[2],name),"w")
	f.write(req.read())
	f.close()


	flatten = lambda l : sum(l, [])
	flap = lambda f,g: flatten(map(f,g))
	body = lambda url : request(url).read()
	clinfo = lambda c,cl : [ [class_id, year, class_name.lower(), sem, c] for class_id, year, class_name, sem in re.findall(c_regex, body(cl)) ]
	clean_name = lambda x: "".join(filter(lambda x:x.isalpha(), map(lambda x: x[0].upper(), filter(lambda x: len(x) > 2, x.split()))))

	courses = flap(lambda c: clinfo(c, url + courses_data[c]), courses_data )

	downloaded_ids = []
	for c in courses:
	for y in range(1,int(c[1])+1):
	time.sleep(2)
	for tip in contents:
	f = contents[tip]

	print c[0], clean_name(c[2]), f(c[0],y), c[4]
	cnt = str(body(f(c[0],y)))
	if "camada de dados" in cnt:
	print "Bug :("
	else:
	print "Content :)"
	clid = c[0]
	programme = c[4]
	abv = clean_name(c[2])

	for t,i in re.findall(d_regex, cnt):
	if (i,tip) in downloaded_ids:
	continue

	p = os.path.join(PATH, programme, abv + "_" + clid, years(y), tip)
	d = (i, durl(i,t), p)
	downloaded_ids.append((i,tip))
	print d
	download(d)