Created
June 3, 2009 14:07
-
-
Save btbytes/123004 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from __future__ import with_statement | |
import os | |
import sys | |
import string | |
from string import Template | |
from config import * | |
from datetime import date | |
from textile import textile | |
from stat import * | |
import datetime | |
import PyRSS2Gen | |
rss = PyRSS2Gen.RSS2( | |
title = options["sitename"], | |
link = options["siteurl"], | |
description = options["slogan"], | |
lastBuildDate = datetime.datetime.now(), | |
items = []) | |
def add_rss_item(rss, title, link, description): | |
item = PyRSS2Gen.RSSItem(title = title, link = link, | |
description = description, | |
guid = PyRSS2Gen.Guid(link), | |
pubDate = datetime.datetime(2003, 9, 6, 21, 31)) | |
rss.items.append(item) | |
def ext(fname): | |
return os.path.splitext(fname)[1] | |
def process(fname): | |
with open(fname, 'r') as f: | |
try: | |
head, body = f.read().split('\n\n') | |
body | |
except: | |
print 'Invalid file format : ', fname | |
def parse(fname): | |
with open(fname, 'r') as f: | |
raw = f.read() | |
headers = {} | |
try: | |
(header_lines,body) = raw.split("\n\n", 1) | |
for header in header_lines.split("\n"): | |
(name, value) = header.split(": ", 1) | |
headers[name.lower()] = unicode(value.strip()) | |
return headers, body | |
except: | |
raise TypeError, "Invalid page file format for %s" % fname | |
def get_template(template): | |
"""Takes the directory where templates are located and the template name. Returns a blob containing the template.""" | |
template = os.path.join(template_dir, template) | |
return Template(open(template, 'r').read()) | |
def source_newer(source, target): | |
if not os.path.exists(target): | |
return True | |
else: | |
smtime = os.stat(source)[ST_MTIME] | |
tmtime = os.stat(target)[ST_MTIME] | |
return smtime > tmtime | |
def is_blog(current_dir, myself, headers, files): | |
"""A page tagged as an entry will get the files, sort them by their dates, | |
and then the contents will be that directory listing instead.""" | |
if 'content-type' in headers and headers['content-type'] == "text/blog": | |
# it's a listing, make it all work | |
without_self = files[:] | |
without_self.remove(os.path.split(myself)[-1]) | |
without_self.sort(reverse=True) | |
listing = [] | |
for f in without_self: | |
print "Doing blog", f | |
# load up the file and peel out the first few paragraphs | |
content = os.path.join(current_dir, f) | |
head, body = parse(content) | |
paras = [p for p in body.split("\n\n") if p] | |
if paras: | |
# now make a simple listing entry with it | |
date, ext = os.path.splitext(f) | |
head["link"] = os.path.join(os.path.split(current_dir)[-1], date + ".html") | |
head["date"] = date | |
format = determine_format(head) | |
head["content"] = content_format(current_dir, f, head, files, | |
format, "\n".join(paras[0:2])) | |
description = get_template(headers['item-template']).safe_substitute(head) | |
add_rss_item(rss, head["title"], options["siteurl"] + head["link"], description) | |
listing.append(description) | |
return lambda s: "".join(listing) | |
else: | |
return lambda s: s | |
def content_format(current_dir, inp, headers, files, format, body): | |
return { | |
u'text/plain': lambda s: u'<pre>%s</pre>' % s, | |
u'text/x-textile': lambda s: u'%s' % textile(s,head_offset=0, validate=0, | |
sanitize=0, encoding='utf-8', output='utf-8'), | |
u'text/html': lambda s: s, | |
u'text/blog': is_blog(current_dir, inp, headers, files) | |
}[format](body) | |
def determine_format(headers): | |
if 'content-type' in headers: | |
return headers['content-type'] | |
else: | |
return options['format'] | |
def parse_directory(current_dir, files, output_dir): | |
files = [f for f in files if ext(f) in options['extensions']] | |
for f in files: | |
inp = os.path.join(current_dir, f) | |
target = os.path.join(output_dir, f) | |
# TODO: Allow specifying the target extension from headers | |
outp = os.path.splitext(target)[0] + '.html' | |
# always redo the indexes since they'll typically list information to | |
# update from the directory they are in | |
if not source_newer(inp, outp) and f != "index.txt": | |
continue | |
headers, body = parse(inp) | |
if 'template' not in headers: | |
blob = get_template(template) | |
else: | |
blob = get_template(headers['template']) | |
format = determine_format(headers) | |
print "Processing %s" % inp | |
content = content_format(current_dir, inp, headers, files, format, body) | |
headers['content'] = content | |
headers.update(options) | |
output = blob.safe_substitute(**headers) | |
outf = open(outp, 'w') | |
outf.write(output) | |
outf.close() | |
def main(): | |
### Walks through the input dir creating finding all subdirectories. | |
for root, dirs, files in os.walk(input_dir): | |
output = root.replace(input_dir, output_dir) | |
### Checks if the directory exists in output and creates it if false. | |
if not os.path.isdir(output): | |
os.makedirs(output) | |
parse_directory(root, files, output) | |
rss.write_xml(open("output/feed.xml", "w")) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment