btbytes · June 3, 2009 14:07
diff --git a/webgen_zs.py b/webgen_zs.py
 #!/usr/bin/env python
 from __future__ import with_statement

 import os
 import sys
 import string
 from string import Template
 from config import *
 from datetime import date
 from textile import textile
 from stat import *
 import datetime
 import PyRSS2Gen

 rss = PyRSS2Gen.RSS2(
    title = options["sitename"],
    link = options["siteurl"],
    description = options["slogan"],
    lastBuildDate = datetime.datetime.now(),
    items = [])


 def add_rss_item(rss, title, link, description):
       item = PyRSS2Gen.RSSItem(title = title, link = link,
         description = description,
         guid = PyRSS2Gen.Guid(link),
         pubDate = datetime.datetime(2003, 9, 6, 21, 31))
       rss.items.append(item)

 def ext(fname):
    return os.path.splitext(fname)[1]

 def process(fname):
    with open(fname, 'r') as f:
        try:
            head, body = f.read().split('\n\n')
            body
        except:
            print 'Invalid file format : ', fname

 def parse(fname):
    with open(fname, 'r') as f:
        raw = f.read()
        headers = {}
        try:
            (header_lines,body) = raw.split("\n\n", 1)
            for header in header_lines.split("\n"):
                (name, value) = header.split(": ", 1)
                headers[name.lower()] = unicode(value.strip())
            return headers, body
        except:
            raise TypeError, "Invalid page file format for %s" % fname

           
 def get_template(template):
    """Takes the directory where templates are located and the template name. Returns a blob containing the template."""
    template = os.path.join(template_dir, template)

    return Template(open(template, 'r').read())
       
 def source_newer(source, target):
    if not os.path.exists(target): 
        return True
    else:
        smtime = os.stat(source)[ST_MTIME]
        tmtime = os.stat(target)[ST_MTIME]
        return smtime > tmtime

 def is_blog(current_dir, myself, headers, files):
    """A page tagged as an entry will get the files, sort them by their dates,
    and then the contents will be that directory listing instead."""
    
    if 'content-type' in headers and headers['content-type'] == "text/blog":
        # it's a listing, make it all work
        without_self = files[:]
        without_self.remove(os.path.split(myself)[-1])
        without_self.sort(reverse=True)

        listing = []
        for f in without_self:
            print "Doing blog", f
            # load up the file and peel out the first few paragraphs
            content = os.path.join(current_dir, f)
            head, body = parse(content)
            paras = [p for p in body.split("\n\n") if p]
            if paras:
                # now make a simple listing entry with it
                date, ext = os.path.splitext(f)
                head["link"] = os.path.join(os.path.split(current_dir)[-1], date + ".html")
                head["date"] = date
                format = determine_format(head)
                head["content"] = content_format(current_dir, f, head, files,
                                                 format, "\n".join(paras[0:2]))
                description = get_template(headers['item-template']).safe_substitute(head)
                add_rss_item(rss, head["title"], options["siteurl"] + head["link"], description)
                listing.append(description)

        return lambda s: "".join(listing)
    else:
        return lambda s: s

 def content_format(current_dir, inp, headers, files, format, body):
    return {
            u'text/plain': lambda s: u'<pre>%s</pre>' % s,
            u'text/x-textile':  lambda s: u'%s' % textile(s,head_offset=0, validate=0, 
                                sanitize=0, encoding='utf-8', output='utf-8'),
            u'text/html': lambda s: s,
            u'text/blog': is_blog(current_dir, inp, headers, files)
        }[format](body)

 def determine_format(headers):
    if 'content-type' in headers:
        return headers['content-type']
    else:
        return options['format']

 def parse_directory(current_dir, files, output_dir):
    files = [f for f in files if ext(f) in options['extensions']]
    for f in files:
        inp = os.path.join(current_dir, f)
        target = os.path.join(output_dir, f)
        # TODO: Allow specifying the target extension from headers
        outp = os.path.splitext(target)[0] + '.html'

        # always redo the indexes since they'll typically list information to
        # update from the directory they are in
        if not source_newer(inp, outp) and f != "index.txt":
            continue

        headers, body = parse(inp)

        if 'template' not in headers:
            blob = get_template(template)
        else:
            blob = get_template(headers['template'])

        format = determine_format(headers)

        print "Processing %s" % inp

        content = content_format(current_dir, inp, headers, files, format, body)
        
        headers['content'] = content
        headers.update(options)
        output = blob.safe_substitute(**headers)

        outf = open(outp, 'w')
        outf.write(output)
        outf.close()

 def main():
    ### Walks through the input dir creating finding all subdirectories.
    for root, dirs, files in os.walk(input_dir):
        output = root.replace(input_dir, output_dir)
        ### Checks if the directory exists in output and creates it if false.
        if not os.path.isdir(output):
            os.makedirs(output)

        parse_directory(root, files, output)
        rss.write_xml(open("output/feed.xml", "w"))

    
 if __name__ == '__main__':
    main()
	#!/usr/bin/env python
	from __future__ import with_statement

	import os
	import sys
	import string
	from string import Template
	from config import *
	from datetime import date
	from textile import textile
	from stat import *
	import datetime
	import PyRSS2Gen

	rss = PyRSS2Gen.RSS2(
	title = options["sitename"],
	link = options["siteurl"],
	description = options["slogan"],
	lastBuildDate = datetime.datetime.now(),
	items = [])


	def add_rss_item(rss, title, link, description):
	item = PyRSS2Gen.RSSItem(title = title, link = link,
	description = description,
	guid = PyRSS2Gen.Guid(link),
	pubDate = datetime.datetime(2003, 9, 6, 21, 31))
	rss.items.append(item)

	def ext(fname):
	return os.path.splitext(fname)[1]

	def process(fname):
	with open(fname, 'r') as f:
	try:
	head, body = f.read().split('\n\n')
	body
	except:
	print 'Invalid file format : ', fname

	def parse(fname):
	with open(fname, 'r') as f:
	raw = f.read()
	headers = {}
	try:
	(header_lines,body) = raw.split("\n\n", 1)
	for header in header_lines.split("\n"):
	(name, value) = header.split(": ", 1)
	headers[name.lower()] = unicode(value.strip())
	return headers, body
	except:
	raise TypeError, "Invalid page file format for %s" % fname


	def get_template(template):
	"""Takes the directory where templates are located and the template name. Returns a blob containing the template."""
	template = os.path.join(template_dir, template)

	return Template(open(template, 'r').read())

	def source_newer(source, target):
	if not os.path.exists(target):
	return True
	else:
	smtime = os.stat(source)[ST_MTIME]
	tmtime = os.stat(target)[ST_MTIME]
	return smtime > tmtime

	def is_blog(current_dir, myself, headers, files):
	"""A page tagged as an entry will get the files, sort them by their dates,
	and then the contents will be that directory listing instead."""

	if 'content-type' in headers and headers['content-type'] == "text/blog":
	# it's a listing, make it all work
	without_self = files[:]
	without_self.remove(os.path.split(myself)[-1])
	without_self.sort(reverse=True)

	listing = []
	for f in without_self:
	print "Doing blog", f
	# load up the file and peel out the first few paragraphs
	content = os.path.join(current_dir, f)
	head, body = parse(content)
	paras = [p for p in body.split("\n\n") if p]
	if paras:
	# now make a simple listing entry with it
	date, ext = os.path.splitext(f)
	head["link"] = os.path.join(os.path.split(current_dir)[-1], date + ".html")
	head["date"] = date
	format = determine_format(head)
	head["content"] = content_format(current_dir, f, head, files,
	format, "\n".join(paras[0:2]))
	description = get_template(headers['item-template']).safe_substitute(head)
	add_rss_item(rss, head["title"], options["siteurl"] + head["link"], description)
	listing.append(description)

	return lambda s: "".join(listing)
	else:
	return lambda s: s

	def content_format(current_dir, inp, headers, files, format, body):
	return {
	u'text/plain': lambda s: u'<pre>%s</pre>' % s,
	u'text/x-textile': lambda s: u'%s' % textile(s,head_offset=0, validate=0,
	sanitize=0, encoding='utf-8', output='utf-8'),
	u'text/html': lambda s: s,
	u'text/blog': is_blog(current_dir, inp, headers, files)
	}[format](body)

	def determine_format(headers):
	if 'content-type' in headers:
	return headers['content-type']
	else:
	return options['format']

	def parse_directory(current_dir, files, output_dir):
	files = [f for f in files if ext(f) in options['extensions']]
	for f in files:
	inp = os.path.join(current_dir, f)
	target = os.path.join(output_dir, f)
	# TODO: Allow specifying the target extension from headers
	outp = os.path.splitext(target)[0] + '.html'

	# always redo the indexes since they'll typically list information to
	# update from the directory they are in
	if not source_newer(inp, outp) and f != "index.txt":
	continue

	headers, body = parse(inp)

	if 'template' not in headers:
	blob = get_template(template)
	else:
	blob = get_template(headers['template'])

	format = determine_format(headers)

	print "Processing %s" % inp

	content = content_format(current_dir, inp, headers, files, format, body)

	headers['content'] = content
	headers.update(options)
	output = blob.safe_substitute(**headers)

	outf = open(outp, 'w')
	outf.write(output)
	outf.close()

	def main():
	### Walks through the input dir creating finding all subdirectories.
	for root, dirs, files in os.walk(input_dir):
	output = root.replace(input_dir, output_dir)
	### Checks if the directory exists in output and creates it if false.
	if not os.path.isdir(output):
	os.makedirs(output)

	parse_directory(root, files, output)
	rss.write_xml(open("output/feed.xml", "w"))


	if __name__ == '__main__':
	main()