Created
May 30, 2012 09:43
-
-
Save peralta/2835160 to your computer and use it in GitHub Desktop.
dirty and filthy script to migrate trac tickets to a static site, based on their rss feed.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
dirty and filthy script to migrate trac tickets to a static site, based on | |
their rss feed. | |
will take all xml files in data/ dir and create the new files with .html | |
extension. also accepts single xml file as input. | |
depends on feedparser (pip install feedparser) | |
to retrieve the tickets for a trac install, just do: | |
let i=1 | |
while [ $i -lt 45000 ] | |
do | |
curl -s "https://trac.example.com/projects/exampleproject/ticket/$i?format=rss" > data/t$i.xml | |
i=$(($i + 1)) | |
done | |
""" | |
import feedparser | |
import glob | |
import re | |
import sys | |
body_template = """<!DOCTYPE html> | |
<html lang="en"> | |
<head> | |
<title>%(title)s</title> | |
<link href="http://twitter.github.com/bootstrap/assets/css/bootstrap.css" rel="stylesheet"> | |
</head> | |
<body> | |
<div class="navbar navbar-fixed-top"> | |
<div class="navbar-inner"> | |
<div class="container"> | |
<a class="brand" href="#">Tuenti.com ancient trac archive</a> | |
</div> | |
</div> | |
</div> | |
<div class="container"> | |
<h1>%(title)s</h1> | |
<div class="hero-unit"> | |
%(subtitle)s | |
</div> | |
<br /> | |
%(entries_html)s | |
</div> | |
</body> | |
""" | |
entry_template = """ | |
<hr /> | |
<h3>%(published)s - %(author)s: %(title)s</h3> | |
%(description)s | |
""" | |
ticket_re = re.compile('(https://trac.tuenti.com/projects/tuenti.com/ticket/)(\d+)') | |
changeset_re = re.compile('(https://trac.tuenti.com/projects/tuenti.com/changeset/)([a-f0-9]+)([^"]+)') | |
def replace_links(text): | |
def link_ticket(match): | |
return 't' + match.group(2) + '.html' | |
def link_changeset(match): | |
return 'http://fisheye.tuenti.int/changelog/release?cs=' + match.group(2) | |
t = re.sub(ticket_re, link_ticket, text) | |
t = re.sub(changeset_re, link_changeset, t) | |
return t | |
def generate_html(feed): | |
entries_html = "" | |
for entry in feed.entries: | |
entry['description'] = replace_links(entry['description']) | |
entries_html += entry_template % entry | |
feed.feed['entries_html'] = entries_html | |
feed.feed['subtitle'] = replace_links(feed.feed['subtitle']) | |
return body_template % feed.feed | |
if __name__ == '__main__': | |
fnames = glob.glob("data/*xml") | |
if len(sys.argv) > 1: | |
fnames = sys.argv[1:] | |
for fname in fnames: | |
try: | |
feed = feedparser.parse(file(fname, "r").read()) | |
print "generating feed for", fname | |
html = generate_html(feed) | |
fd_name = fname.replace("xml", "html") | |
fd = file(fd_name, "w") | |
fd.write(html.encode('iso-8859-1')) | |
fd.close() | |
except: | |
print "problem generating html for", fname | |
#break |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment