Created
August 24, 2014 02:31
-
-
Save Inndy/9648bc677c6cec96c14b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import feedparser, time, sys, re, lxml | |
def strip_html(s): | |
return re.sub('<[^>]+>', '', s) | |
def strip_entity(s): | |
def unescape_entity(m): | |
s = m.group(1).lower() | |
transform_map = { | |
'amp': '&', | |
'gt': '>', | |
'lt': '<', | |
'quot': '"', | |
'nbsp': ' ' | |
} | |
if s[:2] == '#x': | |
return unichr(int(s[2:], 16)) | |
elif s[0] == '#': | |
return unichr(int(s[1:])) | |
elif s == 'amp': | |
return ' ' | |
else: | |
try: | |
return transform_map[s] | |
except KeyError as e: | |
print "Error while decode html entity '{}'".format(m.group(0)) | |
return re.sub('&([#\w]+);', unescape_entity, s) | |
def smart_break(s, indent = 0, max_width = 80): | |
def char_width(s): | |
if s == '\t': | |
return 8 | |
elif ' ' <= s and s <= '~': | |
return 1 | |
else: | |
return 2 | |
out = '' | |
last_line = '' | |
w = indent | |
for ch in s: | |
if ch != '\n': | |
w += char_width(ch) | |
if w > max_width: | |
out += '\n' + ' ' * indent + ch | |
last_line = ch | |
w = indent + char_width(ch) | |
else: | |
out += ch | |
last_line += ch | |
else: | |
out += '\n' + ' ' * indent | |
last_line = '' | |
w = 0 | |
return out | |
feed = feedparser.parse("https://www.facebook.com/feeds/notifications.php?id=10000014******3&viewer=0000014******3&key=A***********ZCiC&format=rss20") | |
entries = feed["entries"] | |
for entry in entries: | |
title = entry['title'] | |
timestamp = time.localtime(time.mktime(entry['published_parsed'])) | |
url = entry['link'] | |
title = entry['summary_detail']['value'] | |
title = strip_html(title) | |
title = strip_entity(title) | |
title = smart_break(title, 22, 84) | |
timestamp = time.strftime('%Y-%m-%d %H:%M:%S', timestamp) | |
print u'{time} - {title}'.format(time = timestamp, title = title) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment