Last active
December 22, 2015 00:48
-
-
Save gabalese/6391443 to your computer and use it in GitHub Desktop.
Raw script to build a quick and dirty preview from an existing full EPUB. Useful to send out previews of existing ebooks.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python | |
# by Gabriele Alese <[email protected]> / http://www.alese.it | |
# Released in Public Domain where applicable: http://creativecommons.org/publicdomain/zero/1.0/ | |
# *** | |
# REQUIREMENTS: | |
# this script makes use of pyepub 2.0.9 | |
# download here: https://github.com/gabalese/pyepub | |
# *** | |
# USAGE: | |
# $ python previewbuilder.py [files...] | |
# output will have the same filename, with "preview_" prepended | |
import sys | |
try: | |
from pyepub import EPUB | |
except ImportError: | |
print "pyepub module not found: see https://github.com/gabalese/pyepub" | |
sys.exit(1) | |
import mimetypes | |
import os | |
from StringIO import StringIO | |
import xml.etree.ElementTree as ET | |
def previewfy(path): | |
epub = EPUB(path, "r") | |
# if no type="text" is found, provide 20% of content | |
num = int(len(epub.info["spine"]) / 100.00 * 20.00) | |
items = [x for x in epub.info["spine"][:num]] | |
itemslist = [] | |
for item in items: | |
for manifest in epub.info["manifest"]: | |
if item["idref"] == manifest["id"]: | |
itemslist.append(manifest["href"]) | |
fakefile = StringIO() | |
output = EPUB(fakefile, "w", title=epub.info["metadata"]["title"], language=epub.info["metadata"]["language"]) | |
src = [] | |
for i in itemslist: | |
from htmlentitydefs import entitydefs | |
parser = ET.XMLParser() | |
parser.parser.UseForeignDTD(True) | |
parser.entity.update(entitydefs) | |
filelike = StringIO(epub.read(os.path.join(epub.root_folder, i))) | |
root = ET.parse(filelike, parser) | |
map(src.append, [os.path.normpath(os.path.join(os.path.dirname(os.path.join(epub.root_folder, i)), x.attrib["src"])) | |
for x in root.findall(".//*[@src]")] + | |
[os.path.normpath(os.path.join(os.path.dirname(os.path.join(epub.root_folder, i)), x.attrib["href"])) | |
for x in root.findall(".//{http://www.w3.org/1999/xhtml}link[@href]")]) | |
src = list(set(src)) # remove multiple references | |
# add non-part manifest items | |
for i in src: | |
output.additem(epub.read(i), i.replace(epub.root_folder+"/", ""), mimetypes.guess_type(i)[0]) | |
# add selected parts | |
for i in itemslist: | |
output.addpart(epub.read(os.path.join(epub.root_folder, i)), i, "application/xhtml+xml") | |
output.close() | |
output.writetodisk("preview_"+os.path.basename(path)) | |
if __name__ == '__main__': | |
for i in sys.argv[1:]: | |
previewfy(i) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Please note: I know the files generated by this script do not pass epubcheck. Maybe I'll amend that, but don't hold your breath.