Skip to content

Instantly share code, notes, and snippets.

@whym
Created October 21, 2017 08:40
Show Gist options
  • Save whym/d6b30a9994b5ab470e8b4b7e869419cb to your computer and use it in GitHub Desktop.
Save whym/d6b30a9994b5ab470e8b4b7e869419cb to your computer and use it in GitHub Desktop.
Some epub readers ignore HR element in HTML. This program fixes it by replacing all HRs with '* * *'.
#! /usr/bin/env python
# -*- coding:utf-8 -*-
"""Some epub readers ignore HR element in HTML. This program fixes it by replacing all HRs with '* * *'. (Replacement is actually customizable beyond that)"""
import sys
import argparse
from ebooklib import epub
import re
import os
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-v', '--verbose', action='store_true',
default=False,
help='turn on verbose message output')
parser.add_argument('-O', '--overwrite', action='store_true',
default=False)
parser.add_argument('-n', '--dryrun', action='store_true',
default=False)
parser.add_argument('-c', '--context', type=int,
default=20)
parser.add_argument('-p', '--pattern', type=str,
default='<hr.*?>')
parser.add_argument('-r', '--replace', type=str,
default='<p style="text-align: center;">* * *</p>')
parser.add_argument('files', nargs='+')
args = parser.parse_args()
if args.verbose:
sys.stderr.write("%s\n" % args)
hrpat = re.compile(args.pattern, re.IGNORECASE)
for fname in args.files:
book = epub.read_epub(fname)
changed = False
def repl(m):
if args.verbose:
print(fname, item.content[m.start()-args.context:m.end()+args.context])
return args.replace
for item in book.get_items():
if isinstance(item, epub.EpubHtml):
newcontent = re.sub(hrpat, repl, item.content)
if newcontent != item.content:
item.content = newcontent
changed = True
if changed:
if args.overwrite:
newname = fname
else:
base, ext = os.path.splitext(fname)
newname = base + '_fixed' + ext
if not args.dryrun:
epub.write_epub(newname, book, {})
print(newname)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment