Skip to content

Instantly share code, notes, and snippets.

@Inndy
Created August 24, 2014 02:31
Show Gist options
  • Save Inndy/9648bc677c6cec96c14b to your computer and use it in GitHub Desktop.
Save Inndy/9648bc677c6cec96c14b to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
import feedparser, time, sys, re, lxml
def strip_html(s):
return re.sub('<[^>]+>', '', s)
def strip_entity(s):
def unescape_entity(m):
s = m.group(1).lower()
transform_map = {
'amp': '&',
'gt': '>',
'lt': '<',
'quot': '"',
'nbsp': ' '
}
if s[:2] == '#x':
return unichr(int(s[2:], 16))
elif s[0] == '#':
return unichr(int(s[1:]))
elif s == 'amp':
return ' '
else:
try:
return transform_map[s]
except KeyError as e:
print "Error while decode html entity '{}'".format(m.group(0))
return re.sub('&([#\w]+);', unescape_entity, s)
def smart_break(s, indent = 0, max_width = 80):
def char_width(s):
if s == '\t':
return 8
elif ' ' <= s and s <= '~':
return 1
else:
return 2
out = ''
last_line = ''
w = indent
for ch in s:
if ch != '\n':
w += char_width(ch)
if w > max_width:
out += '\n' + ' ' * indent + ch
last_line = ch
w = indent + char_width(ch)
else:
out += ch
last_line += ch
else:
out += '\n' + ' ' * indent
last_line = ''
w = 0
return out
feed = feedparser.parse("https://www.facebook.com/feeds/notifications.php?id=10000014******3&viewer=0000014******3&key=A***********ZCiC&format=rss20")
entries = feed["entries"]
for entry in entries:
title = entry['title']
timestamp = time.localtime(time.mktime(entry['published_parsed']))
url = entry['link']
title = entry['summary_detail']['value']
title = strip_html(title)
title = strip_entity(title)
title = smart_break(title, 22, 84)
timestamp = time.strftime('%Y-%m-%d %H:%M:%S', timestamp)
print u'{time} - {title}'.format(time = timestamp, title = title)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment