Skip to content

Instantly share code, notes, and snippets.

@dirkk0
Created September 9, 2013 14:43
Show Gist options
  • Select an option

  • Save dirkk0/6496579 to your computer and use it in GitHub Desktop.

Select an option

Save dirkk0/6496579 to your computer and use it in GitHub Desktop.
test.py
#!/usr/bin/env python
# coding: utf8
from docx import *
import urllib2
# filename = "template.docx"
# document = opendocx(filename)
document = newdocument()
relationships = relationshiplist()
body = document.xpath('/w:document/w:body', namespaces=nsprefixes)[0]
# body = getdocumenttext(document)
pages = open("titles.txt", 'r').read().split('\n')[0:-1]
# collect titles
titles = {}
for t in pages:
try:
pagetitle = t[2:-2]
filename = "pages/" + t[2:-2].lower() + ".txt"
temp = open(filename, 'r').read().split('\n')[0:-1]
l = temp[0]
h = l.replace("======", '')
h = h[1:-1]
titles[pagetitle] = h
print pagetitle
except:
# print filename + " not found"
titles[pagetitle] = "leer"
pages = pages[17:19]
import sys
#sys.exit()
# isComment = False
showComments = True
for f in pages:
filename = "pages/" + f[2:-2].lower() + ".txt"
# print "====" + f
# title = filename # ".".join(f.split('.')[1:-1])
# body.append(heading(title, 1))
try:
temp = open(filename, 'r').read().split('\n')[0:-1]
# temp = temp.split("======")
for l in temp:
# replace links
# if "[[" in l:
# for t in titles:
# l = l.replace(c,t)
# # print l
if "======" in l:
h = l.replace("======", '')
body.append(heading(h, 2))
# body.append(paragraph(h, style='EUHeading2'))
elif "=====" in l:
h = l.replace("=====", '')
body.append(heading(h, 3))
# body.append(paragraph(h, style='EUHeading3'))
elif "====" in l:
h = l.replace("====", '')
body.append(heading(h, 4))
# body.append(paragraph(h, style='EUHeading4'))
elif "===" in l:
h = l.replace("===", '')
body.append(heading(h, 5))
# body.append(paragraph(h, style='EUHeading5'))
elif "{{" in l[0:2]:
l = l.replace("{{", '')
l = l.replace("}}", '')
l = l.strip()
l = l.split('|')[0].strip('|')
l = l.split('?')
if l[0][0] == ":": l[0] = l[0][1:]
if l[0][0:4] == "http":
picfilename = l[0].split('/')[-1]
picfile = urllib2.urlopen(l[0])
# print l[0] + '<------'
# print l[0].split('/')[-1]
output = open(picfilename,'wb')
output.write(picfile.read())
output.close()
l[0] = picfilename
if len(l) < 2: l.append('')
relationships, picpara = picture(relationships, l[0], l[1])
# relationships, picpara = picture(relationships, l[0], l[1], pixelwidth=400, pixelheight=400)
body.append(picpara)
elif "//" in l[0:2]:
# isComment = not isComment
if showComments:
body.append(paragraph( [ (l, 'i')] ) )
elif l and l[0:3] == ' *':
body.append(paragraph(''+l[4:], style='ListBullet'))
elif l and l[0:5] == ' *':
body.append(paragraph('-->'+l[6:], style='Liste2'))
elif " " in l[0:4]:
body.append(paragraph("." + l, style='EUCode'))
else:
pass
# print l
c = unicode(l, "utf-8")
# print c
for i in titles:
# print "trying " + i
c = c.replace(i, titles[i])
body.append(paragraph( [ (c, '')] ) )
body.append(pagebreak(type='page', orient='portrait'))
except:
print filename + " not found."
pass
# Create our properties, contenttypes, and other support files
title = 'Python docx demo'
subject = 'A practical example of making docx from Python'
creator = 'Mike MacCana'
keywords = ['python', 'Office Open XML', 'Word']
coreprops = coreproperties(title=title, subject=subject, creator=creator,
keywords=keywords)
appprops = appproperties()
contenttypes = contenttypes()
websettings = websettings()
wordrelationships = wordrelationships(relationships)
savedocx(document, coreprops, appprops, contenttypes, websettings,
wordrelationships, 'test2.docx')
from subprocess import call
call(["open", "test2.docx"])
# unzip test.docx
# find . | zip file.zip -@
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment