Created
June 16, 2013 16:17
-
-
Save anastasop/5792514 to your computer and use it in GitHub Desktop.
A simple python script that unifies bookmark files exported from chrome and firefox. Useful for old backups but pretty useless in the cloud age. It uses BeautifulSoup (http://www.crummy.com/software/BeautifulSoup/) for html parsing and jinja2 (http://jinja.pocoo.org/docs/) for templating. Tested with python 2.7.3 on ubuntu 64bit 12.04 LTS. The c…
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# create the union of a set of mozilla bookmarks files | |
# usage: ./beq.py bookmarks-20110501.html bookmarks-20120101.html > bookmarks.html | |
# | |
# license: the code is in the public domain | |
from bs4 import BeautifulSoup | |
from jinja2 import Template | |
import sys | |
if len(sys.argv[1:]) == 0: | |
print("usage: ./beq.py bookmarks-20110501.html bookmarks-20120101.html > bookmarks.html") | |
sys.exit(0) | |
bookmarks = {} | |
for fname in sys.argv[1:]: | |
text = unicode(open(fname, 'r').read(), 'utf-8') | |
soup = BeautifulSoup(text) | |
urls = soup.find_all('a', recursive = True) | |
for url in urls: | |
bookmarks[url['href']] = url.string | |
template = Template(u"""<!DOCTYPE html> | |
<html> | |
<head> | |
<meta charset="utf-8"/> | |
<title>Bookmarks</title> | |
</head> | |
<body> | |
<h1>Bookmarks</h1> | |
{% for key, value in hrefs.iteritems() %} | |
<a href="{{key | escape}}">{{value | escape}}</a><br> | |
{% endfor %} | |
</body> | |
</html> | |
""") | |
s = template.render({ 'hrefs': bookmarks}) | |
sys.stdout.write(s.encode('utf-8')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment