Last active
April 6, 2022 00:38
-
-
Save shimizukawa/22b2505f3df04ce95dd07a4f38ee02df to your computer and use it in GitHub Desktop.
EPUBをSphinxソースに変換する https://scrapbox.io/shimizukawa/EPUB%E3%82%92reStructuredText%E3%81%AB%E5%A4%89%E6%8F%9B%E3%81%99%E3%82%8B%E5%90%9B%E3%81%AE%E7%B4%B9%E4%BB%8B
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pathlib import Path | |
import dataclasses | |
import typing | |
import re | |
import ebooklib | |
from ebooklib import epub | |
import html2text | |
import m2r | |
OUT_DIR = Path('./output') | |
SPHINX_CONFIG = """ | |
project = '{}' | |
copyright = '<YOUR COPYRIGHT>' | |
author = '<AUTHOR>' | |
release = version = '<VERSION>' | |
master_doc = 'sphinx' | |
extensions = [ | |
] | |
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', '.venv'] | |
html_theme = 'bizstyle' | |
""" | |
@dataclasses.dataclass | |
class Heading: | |
sec: typing.Tuple[int] | |
node: typing.Union[epub.Link, epub.Section] | |
def process_toc(toc, sec=()) -> typing.List[Heading]: | |
r = [] | |
if isinstance(toc, tuple): | |
r += process_toc(toc[0], sec) | |
r += process_toc(toc[1], sec) | |
elif isinstance(toc, list): | |
for i, item in enumerate(toc): | |
s = sec + (i+1, ) | |
r += process_toc(item, sec=s) | |
else: | |
r += [Heading(sec, toc)] | |
return r | |
NORMALIZER = re.compile(r'[\n\s\t?:*]') | |
def normalize(name): | |
normalized = NORMALIZER.sub('-', name) | |
normalized = re.sub(r'[–-]+', '-', normalized) | |
normalized = re.sub(r'\.-', '.', normalized) | |
return normalized | |
def main(epub_filename): | |
out_dir = Path(OUT_DIR / Path(epub_filename).stem) | |
out_dir.mkdir(exist_ok=True, parents=True) | |
book = epub.read_epub(epub_filename) | |
heads = process_toc(book.toc) | |
if heads[-1].sec[0] == 1: # 全部が1セクションだったら1段調整する | |
heads = process_toc(book.toc[0][1]) | |
cover = '' | |
names = [] | |
refs = set() | |
# writing chapters | |
for h in heads: | |
href = h.node.href | |
item = book.get_item_with_href(href) | |
title = h.node.title | |
if item is None: | |
if h.node.href.split('#', 1)[0] in refs: | |
# print("Skip item for:", href, title) | |
continue # 既に変換済み | |
raise RuntimeError("Can't get item for:", href, title) | |
refs.add(href) | |
basename = normalize(title) | |
depth = len(h.sec) | |
md = html2text.html2text(item.get_content().decode(), bodywidth=0) | |
# heading level | |
first_line, rest_lines = md.split('\n', 1) | |
if depth == 1 and first_line.startswith('# '): | |
rst_first_line = m2r.convert(first_line) | |
_t, _hr = rst_first_line.strip().split('\n') | |
rst_first_line = f'{_hr}\n{_t}\n{_hr}\n' | |
else: | |
first_line = '#' * (depth-2) + first_line | |
rst_first_line = m2r.convert(first_line) | |
rst = rst_first_line + m2r.convert(rest_lines) | |
# writing MD | |
if depth == 1: | |
file_md = out_dir / f'{basename}.md' | |
file_md.open('w') # clear old file | |
with file_md.open('a', encoding='utf-8') as f: | |
f.write(md) | |
# writing reST | |
if depth == 1: | |
file = out_dir / f'{basename}.rst' | |
file.open('w') # clear old file | |
names.append(file.relative_to('.').stem) | |
print('Writing ...', h.sec[0], href, file.relative_to('.')) | |
with file.open('a', encoding='utf-8') as f: | |
f.write(rst) | |
# writing rest of items | |
for item in book.get_items(): | |
file = (out_dir / item.get_name()) | |
if item.file_name in refs: | |
continue | |
elif item.get_type() == ebooklib.ITEM_DOCUMENT: | |
print('skip unprocessed document', item) | |
continue | |
elif item.get_type() == ebooklib.ITEM_NAVIGATION: | |
print('skip navigation file') | |
continue | |
elif item.get_type() == ebooklib.ITEM_STYLE: | |
print('skip style file') | |
continue | |
elif item.get_type() == ebooklib.ITEM_FONT: | |
print('skip font file') | |
continue | |
file.parent.mkdir(exist_ok=True, parents=True) | |
if item.get_type() == ebooklib.ITEM_IMAGE: | |
print('Writing ...', file.relative_to('.')) | |
file.write_bytes(item.get_content()) | |
elif item.get_type() == ebooklib.ITEM_COVER: | |
file = file.with_name('cover.png') | |
print('Writing ...', file.relative_to('.')) | |
file.write_bytes(item.get_content()) | |
cover = str(file.relative_to(out_dir)) | |
else: | |
raise NotImplementedError('Unknown Type: %s' % item) | |
# writing sphinx index | |
with (out_dir / 'sphinx.rst').open('w') as f: | |
f.write('='*len(book.title) + '\n') | |
f.write(f'{book.title}\n') | |
f.write('='*len(book.title) + '\n') | |
f.write('\n') | |
f.write('.. toctree::\n') | |
f.write('\n') | |
f.write(''.join([f' {name}\n' for name in names])) | |
# writing sphinx config | |
with (out_dir / 'conf.py').open('w') as f: | |
f.write(SPHINX_CONFIG.format(book.title)) | |
if cover: | |
f.write(f"html_logo = '{cover}'\n") | |
if __name__ == '__main__': | |
import sys | |
main(sys.argv[1]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(.venv) > python converter.py EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896.epub | |
Writing ... 1 c45552a6-d4a0-43a1-a46d-8b077d65114a.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Title-Page.rst | |
Writing ... 2 8f76a324-86ff-47be-b6f2-c97a1653c271.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Copyright-and-Credits.rst | |
Writing ... 3 e93c552e-76d3-46f5-a489-debcabd838ff.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Dedication.rst | |
Writing ... 4 90e66f3a-32c8-40c7-9511-e94b315162cb.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/About-Packt.rst | |
Writing ... 5 add088be-8cdc-473d-b804-5900cba446b1.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Contributors.rst | |
Writing ... 6 a5965442-971b-4aeb-855a-9b3d0efa38b9.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Preface.rst | |
Writing ... 7 b8f395bd-1bf2-4bac-9072-6c170cd1bf2d.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Section-1-Before-You-Start.rst | |
Writing ... 8 fc34a3fb-f668-423e-9251-92f3f46f06d1.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Current-Status-of-Python.rst | |
Writing ... 9 c3f8e8d6-6134-4264-b384-fc50a02a5a0a.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Modern-Python-Development-Environments.rst | |
Writing ... 10 847e77a3-6560-41eb-9cdf-51b13511727a.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Section-2-Python-Craftsmanship.rst | |
Writing ... 11 f0d4715b-ef6e-4d14-b8a9-9c71913a38d0.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Modern-Syntax-Elements-Below-the-Class-Level.rst | |
Writing ... 12 261a3e3d-f924-40d5-95ef-bcfb39ba751b.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Modern-Syntax-Elements-Above-the-Class-Level.rst | |
Writing ... 13 7e761866-2637-4e14-8fa6-93bbda555a91.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Elements-of-Metaprogramming.rst | |
Writing ... 14 3863a217-5970-4ea5-b1a6-78509517a1b6.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Choosing-Good-Names.rst | |
Writing ... 15 65a3a432-ff05-4b32-9f03-9a4ab9a2bf1e.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Writing-a-Package.rst | |
Writing ... 16 6b9a94bc-5f99-49b4-a1fc-000f0a941eae.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Deploying-the-Code.rst | |
Writing ... 17 8286b1c7-b669-4b55-9441-1316e6820925.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Python-Extensions-in-Other-Languages.rst | |
Writing ... 18 fcabbbc9-0960-494e-92b0-c9146832e04c.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Section-3-Quality-over-Quantity.rst | |
Writing ... 19 8f31752f-8519-435c-b980-3598302185d2.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Managing-Code.rst | |
Writing ... 20 17633700-147b-4cd2-847e-2c0c0041e6fa.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Documenting-Your-Project.rst | |
Writing ... 21 de2d943f-73d3-4c50-8164-d3e1902f8f4a.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Test-Driven-Development.rst | |
Writing ... 22 febc1b7b-38eb-4130-937c-f5dba7265359.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Section-4-Need-for-Speed.rst | |
Writing ... 23 f88ad4e5-495e-45b2-a0e0-723d649026b6.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Optimization-Principles-and-Profiling-Techniques.rst | |
Writing ... 24 6f642a0b-3ed1-42d4-8bf9-b24d5674cd63.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Optimization-Some-Powerful-Techniques.rst | |
Writing ... 25 76ceb1e8-c58f-4d30-b778-b18ea34f7dd6.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Concurrency.rst | |
Writing ... 26 e6519caf-824e-422f-b92a-d001a8fe6c23.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Section-5-Technical-Architecture.rst | |
Writing ... 27 03cb5aa7-33fd-48cd-b735-c5de6e50b7ad.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Event-Driven-and-Signal-Programming.rst | |
Writing ... 28 77e7750c-61c8-404f-ae6a-e0b4c615cac5.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Useful-Design-Patterns.rst | |
Writing ... 29 1ef29cdc-f110-4dd3-992f-81fbbd066314.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/reStructuredText-Primer.rst | |
Writing ... 30 b41673c4-025b-4ab6-b3d0-e75dc827c03b.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Other-Books-You-May-Enjoy.rst | |
skip unprocessed document <EpubNav:nav:toc.xhtml> | |
skip navigation file | |
skip style file | |
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/cover.png | |
skip unprocessed document <EpubHtml:cover:cover.xhtml> | |
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/543c151d-e8a7-4179-bd10-6505abe50fee.png | |
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/1e2e68c9-e2ca-453e-a961-ee25050ddba5.png | |
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/bd2c0520-12be-4f25-ba11-ad040526de22.png | |
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/b4cdabc5-d7e4-4d31-a4db-ba7f967da5a5.png | |
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/be2295e4-dfdb-4002-a182-a4ed1a458140.png | |
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/6d749875-b47a-4d69-9393-21bf70dd6df2.png | |
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/e265aa5b-8a02-47da-906b-3f196372220f.png | |
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/78e369c4-f3ed-4d80-829a-099a4e60cab3.png | |
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/781952c3-4355-4995-ae2a-00df052830fd.png | |
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/e8606fc8-7898-423b-a167-d2484879c89b.png | |
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/315bffc1-0003-4d35-8701-fd8334315c4b.png | |
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/2cb385f6-554a-4bd9-b4c9-e5f74fdba446.png | |
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/d25f4da6-ec9b-47dd-b42a-015d45065e66.png | |
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/465a2a53-dcff-416e-a794-05c0d11eb4f3.png | |
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/422d1e61-b70a-4cb8-9603-e7d63debd50f.png | |
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/8d1614d9-68b1-4a73-a66a-fcb5e24e025f.png | |
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/9bb3759a-c1dd-4dd5-a295-607f982ab575.png | |
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/84cc8f8c-a654-4ded-9c37-e68300f4b266.png | |
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/69dbbdf1-6b10-4661-8f7c-ff529f30ee41.png | |
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/8312890a-eda7-4f78-92df-082187e0c96e.png | |
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/d28842d5-0249-4b7a-8bd0-fba3aa13e9ea.png | |
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/4e8905eb-9467-42b4-97f1-fcf592d85d9a.png | |
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/48fbcdab-523b-4103-a439-8ba3f4722119.png | |
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/2f9f2e8a-5181-4aaf-bf04-c0643627fe24.png | |
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/804d12e0-5d34-4335-ab50-aa332cdd00d9.png | |
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/d6dd77f5-df0b-4e2f-bb1c-37e85b042eda.png | |
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/4dd1064a-b7e7-41b2-af66-77ad3656ebb1.png | |
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/10264766-b20a-4572-86aa-ce247a85a099.png | |
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/85a843aa-300f-42ff-9d89-e282b453910d.png | |
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/5f392a78-61b5-4910-bc9f-9bf825b18860.png | |
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/a4ccc16f-0a01-45ba-b83b-2a83ca174f7f.png | |
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/2fa84c7b-242d-46a8-b5fe-aced5f0d45b5.png | |
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/d95377ce-f0be-4703-a93a-72a6bbca669e.png | |
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/e75ad983-416d-4c7a-b4e8-43d5d0c01d36.png | |
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/c8d76b24-9d53-406b-9d76-1d21614a5fe7.png | |
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/a1c04688-0018-4163-8a0d-4c7e24e1378c.png | |
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/85e249c3-2dfd-4772-bc5b-49fbdb2b714d.png | |
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/40d83a1e-ddca-4879-a6c5-1b52c7cce55a.png | |
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/78f4ee59-bcca-4061-bece-284f4b23d03d.png |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
docutils==0.16 | |
EbookLib==0.17.1 | |
html2text==2020.1.16 | |
lxml==4.5.0 | |
m2r==0.2.1 | |
mistune==0.8.4 | |
six==1.14.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
EbookLib | |
html2text | |
m2r |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment