Skip to content

Instantly share code, notes, and snippets.

@d33tah
Created September 29, 2010 22:13
Show Gist options
  • Select an option

  • Save d33tah/603665 to your computer and use it in GitHub Desktop.

Select an option

Save d33tah/603665 to your computer and use it in GitHub Desktop.
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
Wyświetla RSS dla ogłoszeń umieszczonych na stronie wz.uni.lodz.pl.
BY d33tah, LICENSED UNDER WTFPL.
Wymaga RSS2Gen: http://www.dalkescientific.com/Python/PyRSS2Gen.html
"""
db_config = {'user':'some_user','passwd':'some_password','db':'some_db'}
import urllib
from lxml import html
import PyRSS2Gen
import datetime
import time
import MySQLdb
#from config import db_config
def application(environ, start_response):
frequency = 3 #for caching purposes
now = int(time.time())
url = 'http://zarzadzanie.uni.lodz.pl/Stronag%c5%82%c3%b3wna/' + \
'Wyszukiwarkaog%c5%82osze%c5%84/tabid/169/language/pl-PL/' + \
'Default.aspx?uid='+environ["QUERY_STRING"]
#open the db and re-initialize it if needed
conn = MySQLdb.connect(**db_config)
c = conn.cursor()
#c.execute("set charset latin2;")
c.execute("create table IF NOT EXISTS `cache` (url VARCHAR(256) CHARACTER SET utf8 COLLATE utf8_unicode_ci UNIQUE, value BLOB, lasttime BLOB);")
#look for the entry for a given url. check its time, use the data if correct
query = c.execute("SELECT * FROM cache WHERE url = %s", (url,) )
entry = c.fetchone()
if entry:
if now - int(entry[2]) < frequency:
page = entry[1]
else:
page = urllib.urlopen(url).read()#.decode('iso8859-2')#.encode('utf-8')
c.execute("UPDATE cache SET lasttime = %s, value = %s" \
+ "WHERE url = %s", (now,page,url))
conn.commit()
else:
page = urllib.urlopen(url).read()#.decode('iso8859-2')#.encode('utf-8')
c.execute("INSERT INTO cache VALUES (%s,%s,%s)", (url,page,now))
conn.commit()
#start_response('200 OK', [('Content-type','application/rss+xml')])
#return page
page = page.decode('utf8')
tree = html.fromstring(page)
notices = tree.xpath('//table[@id="%s"]//td[@style="width:300px;"]'
% 'dnn_ctr558_Search_grvWykladowca' )
rss = PyRSS2Gen.RSS2(
#title = 'test',
title = "WZ UŁ - ogłoszenia".decode('utf-8'),
link = "http://deetah.jogger.pl".decode('utf-8'),
#description = 'test'
description = "Kanał RSS zawiera najnowsze ogłoszenia od wybranych "
"wykładowców Wydziału Zarządzania Uniwersytetu Łódzkiego.".decode('utf-8'),
)
for entry in notices:
publishDate = entry.getprevious().text
noticeText = (entry.text_content()
.replace("\r", '')
.replace('\n\t'+12*' ', '') #some HTML align
)[4:] #apparently, there are more spaces here :P
if len(noticeText)>15 :
summary = '[%s] %s (...)' % ( publishDate,
noticeText.replace('\n','')[:15] )
rss.items.append(PyRSS2Gen.RSSItem(
title = summary,
description = noticeText,
link = url,
guid = PyRSS2Gen.Guid( publishDate + noticeText ),
pubDate = datetime.datetime(*(int(x) for x in publishDate.split('-')))
))
start_response('200 OK', [('Content-type','application/rss+xml'), ])
#start_response('200 OK', [('Content-type','application/rss+xml'), ('Content-encoding','utf-8') ])
return rss.to_xml(encoding='iso-8859-2')
#print application({"QUERY_STRING":"135"},'')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment