Skip to content

Instantly share code, notes, and snippets.

@miratcan
Last active February 29, 2016 06:39
Show Gist options
  • Save miratcan/89aff1ae486e91dd0ca8 to your computer and use it in GitHub Desktop.
Save miratcan/89aff1ae486e91dd0ca8 to your computer and use it in GitHub Desktop.
Friendfeed backup tool without any package dependencies. Just download and run like: ./filika bret
#!/usr/bin/env python
__author__ = "Mirat Can Bayrak"
__email__ = "[email protected]"
__copyright__ = "Copyright 2015, Planet Earth"
ENTRIES_PER_PAGE = 100
import re
import logging
from optparse import OptionParser
from urllib2 import urlparse, urlopen, URLError, HTTPError
from os.path import join, dirname, exists
from os import mkdir, getcwd
from json import load, loads, dump
from urllib import urlencode
_loads = lambda string: loads(string.decode("utf-8"))
logging.basicConfig(format='%(levelname)s: %(message)s',
level=logging.INFO)
BODY_T = u"""
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<title>Backup of %(name)s</title>
<meta http-equiv="content-type" content="text/html;charset=utf-8" />
<style>
body{font-family:Arial,sans-serif;font-size:15px;background: #ddd;}div.feed{background:#fff; width:800px;border:1px solid #ccc;margin:0 auto;padding:1em}div.entry{margin:0 0 2em 0}div.body{margin-bottom: 1em;}div.info{font-size:.7em}div.likes{background:top left no-repeat url();padding:0 0 0 20px;margin:5px 0}div.comment{margin:10px 0 0;font-size:13px;line-height:16px;color:#666}div.comment > div.body{margin:0 0 0 20px}span.from,span.liker{text-decoration:underline; margin-right: 5px;}div.quote{background:url();display:inline;height:16px;width:16px;margin:2px 0 0;float:left}.footer {overflow: auto;}
</style>
</head>
<body>
<div class="feed">%(entries)s<div class="footer">%(footer)s</div></div>
</body>
</html>"""
ENTRY_T = u"""<div class="entry"><div class="body">%(body)s</div><div class="media">%(thumbnails)s</div><div class="attachments">%(attachments)s</div><div class="info">%(date)s</div><div class="likes">%(likes)s</div><div class="comments">%(comments)s</div></div>"""
THUMBNAIL_T = u"""<a href="%(t_link)s"><img src="%(t_url)s" /></a>"""
ATTACHMENT_T = u"""<p><a href="%(a_link)s">%(a_name)s</a></p>"""
LIKE_T = u"""<span class="liker">%(name)s</span>"""
COMMENT_T = u"""<div class="comment"><div class="quote" title="%(date)s"></div><div class="body">%(body)s - <span class="from">%(from_name)s</span></div></div>"""
PREV_PAGE_T = u"""<a style="float: left;" href="%(prev_page_path)s">Previous Entries</a>"""
NEXT_PAGE_T = u"""<a style="float: right;" href="%(next_page_path)s">Next Entries</a>"""
def download(remote_file_path, local_file_path):
if not exists(local_file_path):
try:
open(local_file_path, 'w').write(
urlopen(remote_file_path).read())
except URLError:
logging.error("Could'nt downloaded, skipping: %s" %
remote_file_path)
logging.debug("%s downloaded..." % local_file_path)
else:
logging.debug("%s skipping..." % local_file_path)
def slugify(string):
string = re.sub('\s+', '_', string)
string = re.sub('[^\w.-]', '', string)
return string.strip('_.- ').lower()
class FriendFeedSource(object):
FEED_DATA_PATTERN = "http://friendfeed-api.com/v2/feed/%s?%s"
MAX_FETCH_SIZE = 150
WAIT_BETWEEN_BITES = 5 # seconds
def __init__(self, feed_id, data_file_path, fetch_limit=0):
self.feed_id = feed_id
self.fetch_limit = fetch_limit
self.data_file_path = data_file_path
if exists(data_file_path):
with open(data_file_path, 'r') as data_file:
self.feed_buffer = load(data_file)
self.cursor_at = len(self.feed_buffer['entries'])
else:
self.feed_buffer = None
self.cursor_at = 0
logging.info("Initialized for %s, starting from %sth entry" %
(self.feed_id, self.cursor_at))
logging.info("Ready for fetching %s entries" % (
self.fetch_limit or "all"))
def _read(self):
chunk = self._take_a_bite()
if not self.feed_buffer:
self.feed_buffer = chunk
while chunk['entries']:
if self.fetch_limit != 0 and self.cursor_at >= self.fetch_limit:
break
chunk = self._take_a_bite()
self.feed_buffer['entries'].extend(chunk['entries'])
logging.info("%s entries collected." % self.cursor_at)
return self.feed_buffer
def read(self):
try:
return self._read()
except:
with open(self.data_file_path, 'w') as data_file:
dump(self.feed_buffer, data_file)
return self.feed_buffer
def _take_a_bite(self):
if self.fetch_limit:
if self.fetch_limit - self.cursor_at > self.MAX_FETCH_SIZE:
fetch_size = self.MAX_FETCH_SIZE
else:
fetch_size = self.fetch_limit - self.cursor_at
else:
fetch_size = self.MAX_FETCH_SIZE
params = urlencode({"start": self.cursor_at, "num": fetch_size})
feed_url = self.FEED_DATA_PATTERN % (self.feed_id, params)
stream = urlopen(feed_url)
data = _loads(stream.read())
stream.close()
if "errorCode" in data:
raise ValueError(data['errorCode'])
else:
self.cursor_at += self.MAX_FETCH_SIZE
return data
def localize(source, data_root):
def _backup_path(feed_id):
return join((dirname(__file__)), feed_id)
def _thumbnails_path():
return join(data_root, "thumbnails")
def _images_path():
return join(data_root, "images")
def _attachments_path():
return join(data_root, "attachments")
def _queue_download(remote_path, local_path):
downloads.append((remote_path, local_path))
def _localize_thumbnails(entry):
for thumbnail in entry['thumbnails']:
_queue_download(thumbnail['url'],
_filename_for_thumbnail(thumbnail))
thumbnail['url'] = _filename_for_thumbnail(thumbnail)
def _localize_images(entry):
for thumbnail in entry['thumbnails']:
if thumbnail['link'].startswith("http://m.friendfeed-media.com/"):
_queue_download(thumbnail['link'],
_filename_for_image(thumbnail))
thumbnail['link'] = _filename_for_image(thumbnail)
def _localize_attachments(entry):
for attachment in entry['files']:
_queue_download(attachment['url'],
_filename_for_attachment(attachment))
attachment['url'] = _filename_for_attachment(attachment)
def _filename(root, url, default_ext=".jpg"):
filename = slugify(urlparse.urlparse(url).path[1:])
filename = filename + default_ext if "." not in filename else filename
return join(root, filename)
def _filename_for_thumbnail(thumbnail):
return _filename(_thumbnails_path(), thumbnail['url'])
def _filename_for_image(thumbnail):
return _filename(_images_path(), thumbnail['link'])
def _filename_for_attachment(attachment):
return _filename(_attachments_path(),
attachment['name'], default_ext='')
feed_data = source.read()
downloads = []
if not exists(data_root):
mkdir(data_root)
if not exists(_thumbnails_path()):
mkdir(_thumbnails_path())
if not exists(_images_path()):
mkdir(_images_path())
if not exists(_attachments_path()):
mkdir(_attachments_path())
logging.info("Starting localization")
current = 0
for entry in feed_data['entries']:
if "thumbnails" in entry:
_localize_thumbnails(entry)
_localize_images(entry)
if "files" in entry:
_localize_attachments(entry)
logging.info("%s entries localized." % str(current))
current += 1
logging.info("Starting downloads...")
current = 0
dl_length = len(downloads)
for dl in downloads:
logging.info("Downloading %d of %d" % (current, dl_length))
download(dl[0], dl[1])
current += 1
return feed_data
def render(feed_data, data_root, page, filename):
entries_from = ENTRIES_PER_PAGE * page
entries_to = min(entries_from + ENTRIES_PER_PAGE,
len(feed_data['entries']))
entries = feed_data['entries'][entries_from:entries_to]
def relative_path(path):
return path[len(data_root) + 1:]
rendered_entries = ""
for entry in entries:
rendered_comments = ""
rendered_thumbnails = ""
rendered_likes = ""
rendered_footer = ""
rendered_attachments = ""
if 'comments' in entry:
for comment in entry['comments']:
rendered_comments += COMMENT_T % {
'date': comment['date'],
'body': comment['body'],
'from_name': comment['from']['name']}
if 'likes' in entry:
for like in entry['likes']:
rendered_likes += LIKE_T % {'name': like['from']['name']}
if 'thumbnails' in entry:
for thumbnail in entry['thumbnails']:
rendered_thumbnails += THUMBNAIL_T % {
't_url': relative_path(thumbnail['url']),
't_link': relative_path(thumbnail['link'])}
if 'files' in entry:
for attachment in entry['files']:
rendered_attachments += ATTACHMENT_T % {
'a_link': relative_path(attachment['url']),
'a_name': attachment['name']}
rendered_entries += ENTRY_T % {
'body': entry['body'],
'date': entry['date'],
'comments': rendered_comments,
'likes': rendered_likes,
'thumbnails': rendered_thumbnails,
'attachments': rendered_attachments}
next_page = page + 1 if len(feed_data['entries']) \
> entries_to else None
prev_page = page - 1 if entries_from > 0 else None
if prev_page:
rendered_footer += PREV_PAGE_T % {
'prev_page_path': relative_path(filename % {'pagenum': prev_page})}
if next_page:
rendered_footer += NEXT_PAGE_T % {
'next_page_path': relative_path(filename % {'pagenum': next_page})}
with open(filename % {'pagenum': page}, 'w') as htmlfile:
output = BODY_T % {
'entries': rendered_entries, 'name': feed_data['name'],
'footer': rendered_footer}
htmlfile.write(output.encode('utf-8'))
logging.info('rendered: %s' % filename % {'pagenum': page})
return next_page
def main():
parser = OptionParser(
usage="usage: filika [options] feedid", version="filika 1.0")
parser.add_option(
"-l", "--limit", action="store", dest="fetch_limit", default=0,
type="int", help="Limit number of entries that will be collected",)
(options, args) = parser.parse_args()
if len(args) != 1:
parser.error("Wrong number of arguments")
feed_id = args[0]
backup_root = join(getcwd(), feed_id)
if not exists(backup_root):
mkdir(backup_root)
data_root = join(backup_root, "data")
try:
source = FriendFeedSource(feed_id, join(backup_root, feed_id + ".fd"),
fetch_limit=options.fetch_limit)
localized_data = localize(source, data_root)
except HTTPError:
logging.error("Invalid username or user is private feed.")
return
# Dump localized data
localized_data_file = open(join(backup_root, feed_id + ".lfd"), "w")
dump(localized_data, localized_data_file)
localized_data_file.close()
pagename = "index_%(pagenum)s.html"
next_page = render(localized_data, backup_root, 0,
join(backup_root, pagename))
while next_page:
next_page = render(localized_data, backup_root, next_page,
join(backup_root, pagename))
print "---------------------------------------------------------------\n\n"
print "Backup for %s created. Check this folder:\n\n%s \n\n" % (
feed_id, backup_root)
print 'If you want to contact with me use twitter:\n\n'\
'http://twitter.com/mirat\n\n'
if __name__ == '__main__':
main()
@UgurAldanmaz
Copy link

filika ismi çok iyiymiş. 👍

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment