Created
September 29, 2010 22:13
-
-
Save d33tah/603665 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/python | |
| # -*- coding: utf-8 -*- | |
| """ | |
| Wyświetla RSS dla ogłoszeń umieszczonych na stronie wz.uni.lodz.pl. | |
| BY d33tah, LICENSED UNDER WTFPL. | |
| Wymaga RSS2Gen: http://www.dalkescientific.com/Python/PyRSS2Gen.html | |
| """ | |
| db_config = {'user':'some_user','passwd':'some_password','db':'some_db'} | |
| import urllib | |
| from lxml import html | |
| import PyRSS2Gen | |
| import datetime | |
| import time | |
| import MySQLdb | |
| #from config import db_config | |
| def application(environ, start_response): | |
| frequency = 3 #for caching purposes | |
| now = int(time.time()) | |
| url = 'http://zarzadzanie.uni.lodz.pl/Stronag%c5%82%c3%b3wna/' + \ | |
| 'Wyszukiwarkaog%c5%82osze%c5%84/tabid/169/language/pl-PL/' + \ | |
| 'Default.aspx?uid='+environ["QUERY_STRING"] | |
| #open the db and re-initialize it if needed | |
| conn = MySQLdb.connect(**db_config) | |
| c = conn.cursor() | |
| #c.execute("set charset latin2;") | |
| c.execute("create table IF NOT EXISTS `cache` (url VARCHAR(256) CHARACTER SET utf8 COLLATE utf8_unicode_ci UNIQUE, value BLOB, lasttime BLOB);") | |
| #look for the entry for a given url. check its time, use the data if correct | |
| query = c.execute("SELECT * FROM cache WHERE url = %s", (url,) ) | |
| entry = c.fetchone() | |
| if entry: | |
| if now - int(entry[2]) < frequency: | |
| page = entry[1] | |
| else: | |
| page = urllib.urlopen(url).read()#.decode('iso8859-2')#.encode('utf-8') | |
| c.execute("UPDATE cache SET lasttime = %s, value = %s" \ | |
| + "WHERE url = %s", (now,page,url)) | |
| conn.commit() | |
| else: | |
| page = urllib.urlopen(url).read()#.decode('iso8859-2')#.encode('utf-8') | |
| c.execute("INSERT INTO cache VALUES (%s,%s,%s)", (url,page,now)) | |
| conn.commit() | |
| #start_response('200 OK', [('Content-type','application/rss+xml')]) | |
| #return page | |
| page = page.decode('utf8') | |
| tree = html.fromstring(page) | |
| notices = tree.xpath('//table[@id="%s"]//td[@style="width:300px;"]' | |
| % 'dnn_ctr558_Search_grvWykladowca' ) | |
| rss = PyRSS2Gen.RSS2( | |
| #title = 'test', | |
| title = "WZ UŁ - ogłoszenia".decode('utf-8'), | |
| link = "http://deetah.jogger.pl".decode('utf-8'), | |
| #description = 'test' | |
| description = "Kanał RSS zawiera najnowsze ogłoszenia od wybranych " | |
| "wykładowców Wydziału Zarządzania Uniwersytetu Łódzkiego.".decode('utf-8'), | |
| ) | |
| for entry in notices: | |
| publishDate = entry.getprevious().text | |
| noticeText = (entry.text_content() | |
| .replace("\r", '') | |
| .replace('\n\t'+12*' ', '') #some HTML align | |
| )[4:] #apparently, there are more spaces here :P | |
| if len(noticeText)>15 : | |
| summary = '[%s] %s (...)' % ( publishDate, | |
| noticeText.replace('\n','')[:15] ) | |
| rss.items.append(PyRSS2Gen.RSSItem( | |
| title = summary, | |
| description = noticeText, | |
| link = url, | |
| guid = PyRSS2Gen.Guid( publishDate + noticeText ), | |
| pubDate = datetime.datetime(*(int(x) for x in publishDate.split('-'))) | |
| )) | |
| start_response('200 OK', [('Content-type','application/rss+xml'), ]) | |
| #start_response('200 OK', [('Content-type','application/rss+xml'), ('Content-encoding','utf-8') ]) | |
| return rss.to_xml(encoding='iso-8859-2') | |
| #print application({"QUERY_STRING":"135"},'') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment