Skip to content

Instantly share code, notes, and snippets.

@btbytes
Created June 3, 2009 14:07
Show Gist options
  • Save btbytes/123004 to your computer and use it in GitHub Desktop.
Save btbytes/123004 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
from __future__ import with_statement
import os
import sys
import string
from string import Template
from config import *
from datetime import date
from textile import textile
from stat import *
import datetime
import PyRSS2Gen
rss = PyRSS2Gen.RSS2(
title = options["sitename"],
link = options["siteurl"],
description = options["slogan"],
lastBuildDate = datetime.datetime.now(),
items = [])
def add_rss_item(rss, title, link, description):
item = PyRSS2Gen.RSSItem(title = title, link = link,
description = description,
guid = PyRSS2Gen.Guid(link),
pubDate = datetime.datetime(2003, 9, 6, 21, 31))
rss.items.append(item)
def ext(fname):
return os.path.splitext(fname)[1]
def process(fname):
with open(fname, 'r') as f:
try:
head, body = f.read().split('\n\n')
body
except:
print 'Invalid file format : ', fname
def parse(fname):
with open(fname, 'r') as f:
raw = f.read()
headers = {}
try:
(header_lines,body) = raw.split("\n\n", 1)
for header in header_lines.split("\n"):
(name, value) = header.split(": ", 1)
headers[name.lower()] = unicode(value.strip())
return headers, body
except:
raise TypeError, "Invalid page file format for %s" % fname
def get_template(template):
"""Takes the directory where templates are located and the template name. Returns a blob containing the template."""
template = os.path.join(template_dir, template)
return Template(open(template, 'r').read())
def source_newer(source, target):
if not os.path.exists(target):
return True
else:
smtime = os.stat(source)[ST_MTIME]
tmtime = os.stat(target)[ST_MTIME]
return smtime > tmtime
def is_blog(current_dir, myself, headers, files):
"""A page tagged as an entry will get the files, sort them by their dates,
and then the contents will be that directory listing instead."""
if 'content-type' in headers and headers['content-type'] == "text/blog":
# it's a listing, make it all work
without_self = files[:]
without_self.remove(os.path.split(myself)[-1])
without_self.sort(reverse=True)
listing = []
for f in without_self:
print "Doing blog", f
# load up the file and peel out the first few paragraphs
content = os.path.join(current_dir, f)
head, body = parse(content)
paras = [p for p in body.split("\n\n") if p]
if paras:
# now make a simple listing entry with it
date, ext = os.path.splitext(f)
head["link"] = os.path.join(os.path.split(current_dir)[-1], date + ".html")
head["date"] = date
format = determine_format(head)
head["content"] = content_format(current_dir, f, head, files,
format, "\n".join(paras[0:2]))
description = get_template(headers['item-template']).safe_substitute(head)
add_rss_item(rss, head["title"], options["siteurl"] + head["link"], description)
listing.append(description)
return lambda s: "".join(listing)
else:
return lambda s: s
def content_format(current_dir, inp, headers, files, format, body):
return {
u'text/plain': lambda s: u'<pre>%s</pre>' % s,
u'text/x-textile': lambda s: u'%s' % textile(s,head_offset=0, validate=0,
sanitize=0, encoding='utf-8', output='utf-8'),
u'text/html': lambda s: s,
u'text/blog': is_blog(current_dir, inp, headers, files)
}[format](body)
def determine_format(headers):
if 'content-type' in headers:
return headers['content-type']
else:
return options['format']
def parse_directory(current_dir, files, output_dir):
files = [f for f in files if ext(f) in options['extensions']]
for f in files:
inp = os.path.join(current_dir, f)
target = os.path.join(output_dir, f)
# TODO: Allow specifying the target extension from headers
outp = os.path.splitext(target)[0] + '.html'
# always redo the indexes since they'll typically list information to
# update from the directory they are in
if not source_newer(inp, outp) and f != "index.txt":
continue
headers, body = parse(inp)
if 'template' not in headers:
blob = get_template(template)
else:
blob = get_template(headers['template'])
format = determine_format(headers)
print "Processing %s" % inp
content = content_format(current_dir, inp, headers, files, format, body)
headers['content'] = content
headers.update(options)
output = blob.safe_substitute(**headers)
outf = open(outp, 'w')
outf.write(output)
outf.close()
def main():
### Walks through the input dir creating finding all subdirectories.
for root, dirs, files in os.walk(input_dir):
output = root.replace(input_dir, output_dir)
### Checks if the directory exists in output and creates it if false.
if not os.path.isdir(output):
os.makedirs(output)
parse_directory(root, files, output)
rss.write_xml(open("output/feed.xml", "w"))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment