Last active
February 29, 2016 06:39
-
-
Save miratcan/89aff1ae486e91dd0ca8 to your computer and use it in GitHub Desktop.
Friendfeed backup tool without any package dependencies. Just download and run like: ./filika bret
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
__author__ = "Mirat Can Bayrak" | |
__email__ = "[email protected]" | |
__copyright__ = "Copyright 2015, Planet Earth" | |
ENTRIES_PER_PAGE = 100 | |
import re | |
import logging | |
from optparse import OptionParser | |
from urllib2 import urlparse, urlopen, URLError, HTTPError | |
from os.path import join, dirname, exists | |
from os import mkdir, getcwd | |
from json import load, loads, dump | |
from urllib import urlencode | |
_loads = lambda string: loads(string.decode("utf-8")) | |
logging.basicConfig(format='%(levelname)s: %(message)s', | |
level=logging.INFO) | |
BODY_T = u""" | |
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
<head> | |
<title>Backup of %(name)s</title> | |
<meta http-equiv="content-type" content="text/html;charset=utf-8" /> | |
<style> | |
body{font-family:Arial,sans-serif;font-size:15px;background: #ddd;}div.feed{background:#fff; width:800px;border:1px solid #ccc;margin:0 auto;padding:1em}div.entry{margin:0 0 2em 0}div.body{margin-bottom: 1em;}div.info{font-size:.7em}div.likes{background:top left no-repeat url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJbWFnZVJlYWR5ccllPAAAAnVJREFUeNqkk89LFGEYx7/vzLrq7raullAEykYUGVJR7HqRjSiSpIMeuniyWx0XvGT9BUL3TkURHQJFSDCKDTskGCFhYf5qXVfQcG3NVtfZd955+85oWyR2qIHheWbm/X7e7/M+zwitNf7nEn8C7FcixFdJ7aDNcRBzPzMfYz7MeHffVV3YE0Bxgovui4aeqIjEoP11VCvoYg4qPw5rpi9NUHdthx7ZBbBTokNHEv3GwS7oQA3U5ic41iq0vcVVVTCqjkJIA9bCU1hfRjrrr+mBMkCmRB1Dxmx+EnKMdaitNCC/w7EtGtjahjgaorIBZmUzvo1eL9BJ46Eu/dVwKbSdNBp7Q9rPdRbF9gZSgzk+lABVwusXTGUB9voElDWHwInekKOQdLUegLR2URuHw521XSRQ4vyVai9qJdGaKDCnGxey9g6+yDEQ0P47oAmVYVpeK4u8aJeYl3Zy6eWORWewoBSiZQBLKHp1KhsvB95gfnpxW6jkTrSQmVtF6vmiBwMPjJuavxwopCGLzPxoSRzHzMcssnPLntAVLKTzmJ3KIxYPQxgBrvdBWph1tb6dEoZkbvy0EWlAILiEltYoJt7OY/J9lq4kgtUCsVgNggETvvApFHMZyBKGym3M94t6Qj6H4o9CjpyCKkyyATwPttGxf7YRMIOchfBlZJ7dKPBIjpy5pVe8Emo79Yo7YZuzD1nfYZjhOIRZR/va67/hP4CK+oscsAtY/TDo7t7tineN8tJjcYnncS/Y1BP17z8Jo4J8x4ZtKWwsTWN5tC/NnW+eva2H9/yZsg+EOyQ9dNTGVsXYSVA0xnuYd9+5O3/5mf7l+iHAAP3UjrcWL0PwAAAAAElFTkSuQmCC);padding:0 0 0 20px;margin:5px 0}div.comment{margin:10px 0 0;font-size:13px;line-height:16px;color:#666}div.comment > div.body{margin:0 0 0 20px}span.from,span.liker{text-decoration:underline; margin-right: 5px;}div.quote{background:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJbWFnZVJlYWR5ccllPAAAAXRJREFUeNpi/P//PwMlgImBQsACY0R27BQCUpOBOASI2fDo2Q3EWcsr3O+AeSAvgHBE+46NKw/e+v/7z9//+MCWk/f/A9XeBmI2kD4WqO08QMo7xFaF4fMPBoaff/4z/AUGzb9/DGD6L4j+95+BEaTITIHh+PUXKneffzQAck/BvCAkwsfBzMTIyPDj93+Gf/+RNIIN+s/w5x/EQBDg4wL7UBQlDGAApvEfks0IV+COhXdvPv0AqgX6iQmi8A/YVojNMAwMMbDiLz9+g/XADQCG6BcgtXXN4TsMIsDQUBBmZFAWYWJQE2Ni4GFnZPj7l4FBko+RQU+amWHrqQcMt59+AMXABZBeRlhCQorGcCBmBom5GsoyJLlrgeU/fPnJsO7oXYbd5x9fAXIDMaIRGwZG1Z3n777+n7vjKijqvgNxLboaRlxJGeiiXkd9maL9F598BXJrgTb2Y1OH1QCgZiUgtQeI24Aa5+BLyowDnpkoNgAgwACtnv0bwuZFOAAAAABJRU5ErkJggg==);display:inline;height:16px;width:16px;margin:2px 0 0;float:left}.footer {overflow: auto;} | |
</style> | |
</head> | |
<body> | |
<div class="feed">%(entries)s<div class="footer">%(footer)s</div></div> | |
</body> | |
</html>""" | |
ENTRY_T = u"""<div class="entry"><div class="body">%(body)s</div><div class="media">%(thumbnails)s</div><div class="attachments">%(attachments)s</div><div class="info">%(date)s</div><div class="likes">%(likes)s</div><div class="comments">%(comments)s</div></div>""" | |
THUMBNAIL_T = u"""<a href="%(t_link)s"><img src="%(t_url)s" /></a>""" | |
ATTACHMENT_T = u"""<p><a href="%(a_link)s">%(a_name)s</a></p>""" | |
LIKE_T = u"""<span class="liker">%(name)s</span>""" | |
COMMENT_T = u"""<div class="comment"><div class="quote" title="%(date)s"></div><div class="body">%(body)s - <span class="from">%(from_name)s</span></div></div>""" | |
PREV_PAGE_T = u"""<a style="float: left;" href="%(prev_page_path)s">Previous Entries</a>""" | |
NEXT_PAGE_T = u"""<a style="float: right;" href="%(next_page_path)s">Next Entries</a>""" | |
def download(remote_file_path, local_file_path): | |
if not exists(local_file_path): | |
try: | |
open(local_file_path, 'w').write( | |
urlopen(remote_file_path).read()) | |
except URLError: | |
logging.error("Could'nt downloaded, skipping: %s" % | |
remote_file_path) | |
logging.debug("%s downloaded..." % local_file_path) | |
else: | |
logging.debug("%s skipping..." % local_file_path) | |
def slugify(string): | |
string = re.sub('\s+', '_', string) | |
string = re.sub('[^\w.-]', '', string) | |
return string.strip('_.- ').lower() | |
class FriendFeedSource(object): | |
FEED_DATA_PATTERN = "http://friendfeed-api.com/v2/feed/%s?%s" | |
MAX_FETCH_SIZE = 150 | |
WAIT_BETWEEN_BITES = 5 # seconds | |
def __init__(self, feed_id, data_file_path, fetch_limit=0): | |
self.feed_id = feed_id | |
self.fetch_limit = fetch_limit | |
self.data_file_path = data_file_path | |
if exists(data_file_path): | |
with open(data_file_path, 'r') as data_file: | |
self.feed_buffer = load(data_file) | |
self.cursor_at = len(self.feed_buffer['entries']) | |
else: | |
self.feed_buffer = None | |
self.cursor_at = 0 | |
logging.info("Initialized for %s, starting from %sth entry" % | |
(self.feed_id, self.cursor_at)) | |
logging.info("Ready for fetching %s entries" % ( | |
self.fetch_limit or "all")) | |
def _read(self): | |
chunk = self._take_a_bite() | |
if not self.feed_buffer: | |
self.feed_buffer = chunk | |
while chunk['entries']: | |
if self.fetch_limit != 0 and self.cursor_at >= self.fetch_limit: | |
break | |
chunk = self._take_a_bite() | |
self.feed_buffer['entries'].extend(chunk['entries']) | |
logging.info("%s entries collected." % self.cursor_at) | |
return self.feed_buffer | |
def read(self): | |
try: | |
return self._read() | |
except: | |
with open(self.data_file_path, 'w') as data_file: | |
dump(self.feed_buffer, data_file) | |
return self.feed_buffer | |
def _take_a_bite(self): | |
if self.fetch_limit: | |
if self.fetch_limit - self.cursor_at > self.MAX_FETCH_SIZE: | |
fetch_size = self.MAX_FETCH_SIZE | |
else: | |
fetch_size = self.fetch_limit - self.cursor_at | |
else: | |
fetch_size = self.MAX_FETCH_SIZE | |
params = urlencode({"start": self.cursor_at, "num": fetch_size}) | |
feed_url = self.FEED_DATA_PATTERN % (self.feed_id, params) | |
stream = urlopen(feed_url) | |
data = _loads(stream.read()) | |
stream.close() | |
if "errorCode" in data: | |
raise ValueError(data['errorCode']) | |
else: | |
self.cursor_at += self.MAX_FETCH_SIZE | |
return data | |
def localize(source, data_root): | |
def _backup_path(feed_id): | |
return join((dirname(__file__)), feed_id) | |
def _thumbnails_path(): | |
return join(data_root, "thumbnails") | |
def _images_path(): | |
return join(data_root, "images") | |
def _attachments_path(): | |
return join(data_root, "attachments") | |
def _queue_download(remote_path, local_path): | |
downloads.append((remote_path, local_path)) | |
def _localize_thumbnails(entry): | |
for thumbnail in entry['thumbnails']: | |
_queue_download(thumbnail['url'], | |
_filename_for_thumbnail(thumbnail)) | |
thumbnail['url'] = _filename_for_thumbnail(thumbnail) | |
def _localize_images(entry): | |
for thumbnail in entry['thumbnails']: | |
if thumbnail['link'].startswith("http://m.friendfeed-media.com/"): | |
_queue_download(thumbnail['link'], | |
_filename_for_image(thumbnail)) | |
thumbnail['link'] = _filename_for_image(thumbnail) | |
def _localize_attachments(entry): | |
for attachment in entry['files']: | |
_queue_download(attachment['url'], | |
_filename_for_attachment(attachment)) | |
attachment['url'] = _filename_for_attachment(attachment) | |
def _filename(root, url, default_ext=".jpg"): | |
filename = slugify(urlparse.urlparse(url).path[1:]) | |
filename = filename + default_ext if "." not in filename else filename | |
return join(root, filename) | |
def _filename_for_thumbnail(thumbnail): | |
return _filename(_thumbnails_path(), thumbnail['url']) | |
def _filename_for_image(thumbnail): | |
return _filename(_images_path(), thumbnail['link']) | |
def _filename_for_attachment(attachment): | |
return _filename(_attachments_path(), | |
attachment['name'], default_ext='') | |
feed_data = source.read() | |
downloads = [] | |
if not exists(data_root): | |
mkdir(data_root) | |
if not exists(_thumbnails_path()): | |
mkdir(_thumbnails_path()) | |
if not exists(_images_path()): | |
mkdir(_images_path()) | |
if not exists(_attachments_path()): | |
mkdir(_attachments_path()) | |
logging.info("Starting localization") | |
current = 0 | |
for entry in feed_data['entries']: | |
if "thumbnails" in entry: | |
_localize_thumbnails(entry) | |
_localize_images(entry) | |
if "files" in entry: | |
_localize_attachments(entry) | |
logging.info("%s entries localized." % str(current)) | |
current += 1 | |
logging.info("Starting downloads...") | |
current = 0 | |
dl_length = len(downloads) | |
for dl in downloads: | |
logging.info("Downloading %d of %d" % (current, dl_length)) | |
download(dl[0], dl[1]) | |
current += 1 | |
return feed_data | |
def render(feed_data, data_root, page, filename): | |
entries_from = ENTRIES_PER_PAGE * page | |
entries_to = min(entries_from + ENTRIES_PER_PAGE, | |
len(feed_data['entries'])) | |
entries = feed_data['entries'][entries_from:entries_to] | |
def relative_path(path): | |
return path[len(data_root) + 1:] | |
rendered_entries = "" | |
for entry in entries: | |
rendered_comments = "" | |
rendered_thumbnails = "" | |
rendered_likes = "" | |
rendered_footer = "" | |
rendered_attachments = "" | |
if 'comments' in entry: | |
for comment in entry['comments']: | |
rendered_comments += COMMENT_T % { | |
'date': comment['date'], | |
'body': comment['body'], | |
'from_name': comment['from']['name']} | |
if 'likes' in entry: | |
for like in entry['likes']: | |
rendered_likes += LIKE_T % {'name': like['from']['name']} | |
if 'thumbnails' in entry: | |
for thumbnail in entry['thumbnails']: | |
rendered_thumbnails += THUMBNAIL_T % { | |
't_url': relative_path(thumbnail['url']), | |
't_link': relative_path(thumbnail['link'])} | |
if 'files' in entry: | |
for attachment in entry['files']: | |
rendered_attachments += ATTACHMENT_T % { | |
'a_link': relative_path(attachment['url']), | |
'a_name': attachment['name']} | |
rendered_entries += ENTRY_T % { | |
'body': entry['body'], | |
'date': entry['date'], | |
'comments': rendered_comments, | |
'likes': rendered_likes, | |
'thumbnails': rendered_thumbnails, | |
'attachments': rendered_attachments} | |
next_page = page + 1 if len(feed_data['entries']) \ | |
> entries_to else None | |
prev_page = page - 1 if entries_from > 0 else None | |
if prev_page: | |
rendered_footer += PREV_PAGE_T % { | |
'prev_page_path': relative_path(filename % {'pagenum': prev_page})} | |
if next_page: | |
rendered_footer += NEXT_PAGE_T % { | |
'next_page_path': relative_path(filename % {'pagenum': next_page})} | |
with open(filename % {'pagenum': page}, 'w') as htmlfile: | |
output = BODY_T % { | |
'entries': rendered_entries, 'name': feed_data['name'], | |
'footer': rendered_footer} | |
htmlfile.write(output.encode('utf-8')) | |
logging.info('rendered: %s' % filename % {'pagenum': page}) | |
return next_page | |
def main(): | |
parser = OptionParser( | |
usage="usage: filika [options] feedid", version="filika 1.0") | |
parser.add_option( | |
"-l", "--limit", action="store", dest="fetch_limit", default=0, | |
type="int", help="Limit number of entries that will be collected",) | |
(options, args) = parser.parse_args() | |
if len(args) != 1: | |
parser.error("Wrong number of arguments") | |
feed_id = args[0] | |
backup_root = join(getcwd(), feed_id) | |
if not exists(backup_root): | |
mkdir(backup_root) | |
data_root = join(backup_root, "data") | |
try: | |
source = FriendFeedSource(feed_id, join(backup_root, feed_id + ".fd"), | |
fetch_limit=options.fetch_limit) | |
localized_data = localize(source, data_root) | |
except HTTPError: | |
logging.error("Invalid username or user is private feed.") | |
return | |
# Dump localized data | |
localized_data_file = open(join(backup_root, feed_id + ".lfd"), "w") | |
dump(localized_data, localized_data_file) | |
localized_data_file.close() | |
pagename = "index_%(pagenum)s.html" | |
next_page = render(localized_data, backup_root, 0, | |
join(backup_root, pagename)) | |
while next_page: | |
next_page = render(localized_data, backup_root, next_page, | |
join(backup_root, pagename)) | |
print "---------------------------------------------------------------\n\n" | |
print "Backup for %s created. Check this folder:\n\n%s \n\n" % ( | |
feed_id, backup_root) | |
print 'If you want to contact with me use twitter:\n\n'\ | |
'http://twitter.com/mirat\n\n' | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
filika ismi çok iyiymiş. 👍