Skip to content

Instantly share code, notes, and snippets.

@pylemon
Created February 26, 2014 10:31
Show Gist options
  • Save pylemon/9227278 to your computer and use it in GitHub Desktop.
Save pylemon/9227278 to your computer and use it in GitHub Desktop.
import feedparser
import re
import time
URL = 'https://waimaichaoren.atlassian.net/activity?maxResults=5&os_authType=basic'
COOKIE_STR = """"""
REQUEST_HEADER = {
'Cookie': COOKIE_STR,
}
CHECKUP_INTERVAL = 10 # seconds
def clean_html(raw_html):
cleanr =re.compile('<.*?>')
return re.sub(cleanr,'', raw_html)
def get_stream_data(url, max_items):
f = feedparser.parse(URL, request_headers=REQUEST_HEADER)
entries = f.entries[:max_items]
result = []
for ent in entries:
id = ent.id
title = ent.title
summary = ent.summary
result.append({
'id': id,
'title': clean_html(title),
'summary': clean_html(summary),
})
return result
if __name__ == '__main__':
feed_dict = {}
while 1:
data = get_stream_data(URL, 5)
for d in data:
if feed_dict.get(d['id']):
continue
else:
print '>>> ', d['title']
print d['summary'], '\n\n'
feed_dict[d['id']] = d
time.sleep(CHECKUP_INTERVAL)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment