Skip to content

Instantly share code, notes, and snippets.

@twlz0ne
Forked from scturtle/txt2epub.py
Created May 30, 2020 09:06
Show Gist options
  • Save twlz0ne/f0550f95d10a3a27b6aaafb3dbc80117 to your computer and use it in GitHub Desktop.
Save twlz0ne/f0550f95d10a3a27b6aaafb3dbc80117 to your computer and use it in GitHub Desktop.
txt to epub
# coding: utf-8
import os
#书籍信息
title='test book'
creator='scturtle'
description='blablablabla'
#章节文件
txtlist=['1.txt','2.txt']
# 临时工作目录
os.mkdir('tmp')
# tmp/mimetype 文件
tf=file('tmp/mimetype','w')
tf.write('application/epub+zip')
tf.close()
# tmp/META-INF 目录
os.mkdir('tmp/META-INF')
# tmp/META-INF/container.xml 文件, 指定opf文件
tf=file('tmp/META-INF/container.xml','w')
tf.write('''<?xml version="1.0" encoding="UTF-8" ?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
<rootfiles> <rootfile full-path="OPS/content.opf" media-type="application/oebps-package+xml"/> </rootfiles>
</container>
''')
tf.close()
# tmp/OPS 目录 主要存在这里
os.mkdir('tmp/OPS')
# 封面图片
if os.path.isfile('cover.jpg'):
import shutil
shutil.copyfile('cover.jpg','tmp/OPS/cover.jpg')
print 'Cover found!'
# 准备opf文件信息,包含目录文件封面文件信息
opfcontent='''<?xml version="1.0" encoding="UTF-8" ?>
<package version="2.0" unique-identifier="PrimaryID" xmlns="http://www.idpf.org/2007/opf">
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">
%(metadata)s
<meta name="cover" content="cover"/>
</metadata>
<manifest>
%(manifest)s
<item id="ncx" href="content.ncx" media-type="application/x-dtbncx+xml"/>
<item id="cover" href="cover.jpg" media-type="image/jpeg"/>
</manifest>
<spine toc="ncx">
%(ncx)s
</spine>
</package>
'''
dc='<dc:%(name)s>%(value)s</dc:%(name)s>'
item="<item id='%(id)s' href='%(url)s' media-type='application/xhtml+xml'/>"
itemref="<itemref idref='%(id)s'/>"
metadata='\n'.join([
dc % {'name':'title','value':title},
dc % {'name':'creator','value':creator},
dc % {'name':'description','value':description},
])
manifest=[]
ncx=[]
# 根据每章节生成内容, txt => html
for txt in txtlist:
content=file(txt,'r').read().split('\n')
content=[ '<div><p>%s</p></div>' % line for line in content]
tf=file('tmp/OPS/%s.xhtml' % txt,'w')
tf.write('''<html><head>
<title>%(title)s</title>
</head><body><h2>%(title)s</h2><div>
''' %{'title':txt})
tf.write('\n'.join(content))
tf.write('</div></body></html>')
tf.close()
manifest.append(item % {'id':txt, 'url':txt+'.xhtml'})
ncx.append(itemref % {'id':txt})
manifest='\n'.join(manifest)
ncx='\n'.join(ncx)
# 生成opf文件
tf=file('tmp/OPS/content.opf','w')
tf.write(opfcontent %{'metadata':metadata,'manifest':manifest,'ncx':ncx,})
tf.close()
# 准备目录文件ncx内容
ncx='''<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE ncx PUBLIC "-//NISO//DTD ncx 2005-1//EN" "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">
<ncx version="2005-1" xmlns="http://www.daisy.org/z3986/2005/ncx/">
<head>
<meta name="dtb:uid" content=" "/>
<meta name="dtb:depth" content="-1"/>
<meta name="dtb:totalPageCount" content="0"/>
<meta name="dtb:maxPageNumber" content="0"/>
</head>
<docTitle><text>%(title)s</text></docTitle>
<docAuthor><text>%(creator)s</text></docAuthor>
<navMap>
%(navpoints)s
</navMap>
</ncx>
'''
navpoint='''<navPoint id='%s' class='level1' playOrder='%d'>
<navLabel> <text>%s</text> </navLabel>
<content src='%s'/></navPoint>'''
navpoints=[]
for i,txt in enumerate(txtlist):
navpoints.append(navpoint % (txt,i+1,txt,txt+'.xhtml'))
# 生成目录文件
tf=file('tmp/OPS/content.ncx','w')
tf.write(ncx % {
'title':title,'creator':creator,
'navpoints':'\n'.join(navpoints)})
tf.close()
# 打个压缩包
from zipfile import ZipFile
epubfile=ZipFile('book.epub','w')
os.chdir('tmp')
for d,ds,fs in os.walk('.'):
for f in fs:
epubfile.write(os.path.join(d,f))
epubfile.close()
print 'Finished!'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment