Created
December 16, 2010 21:31
-
-
Save robballou/744051 to your computer and use it in GitHub Desktop.
Import delicious bookmark XML to mongodb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Python script to copy a delicious XML file into a mongodb. | |
Essentially saves all attributes from the XML as-is, but I do make the following changes: | |
- Added a "tags" attribute to the saved document that separates the tags into a list | |
- Converted the "time" attribute to a datetime | |
Assumes the collection name is "links". | |
Usage: | |
python import_to_mongo.py | |
Requires: | |
- pymongo: http://api.mongodb.org/python/ | |
- beautifulsoup: http://www.crummy.com/software/BeautifulSoup/ | |
""" | |
import datetime | |
from BeautifulSoup import BeautifulSoup | |
import pymongo | |
# Add your mongodb info here. I used a full mongodb:// uri | |
# http://www.mongodb.org/display/DOCS/Connections | |
HOST = 'mongodb://user:pass@host/db' | |
PORT = 27076 | |
# Add the local filepeath to your bookmarks file | |
BOOKMARKS = '/Users/myuser/bookmarks.xml' | |
if __name__ == '__main__': | |
conn = pymongo.Connection(HOST, PORT) | |
db = conn.links | |
soup = BeautifulSoup(open(BOOKMARKS).read()) | |
links = soup.findAll('post') | |
for link in links: | |
doc = {} | |
for attr, value in link.attrs: | |
doc[attr] = value | |
doc['tags'] = [tag for tag in doc['tag'].split()] | |
doc['time'] = datetime.datetime.strptime(doc['time'], '%Y-%m-%dT%H:%M:%SZ') | |
db.links.insert(doc) | |
conn.disconnect() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment