Created
July 10, 2023 03:20
-
-
Save muxueqz/d8f00fbd24c37bf8d4f2a665add9408c to your computer and use it in GitHub Desktop.
make pdf index/bookmark with pikepdf
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import re | |
import sys | |
from pikepdf import Pdf, OutlineItem | |
input_str = """ | |
007 第1章 不是咆哮,是低语 | |
011 第2章 认识自己 | |
014 第3章 禅之柱 | |
017 第4章 绝对沉默 | |
022 第5章 自绝对平静到绝对混乱 | |
""" | |
input_str = sys.stdin.read() | |
# 使用正则表达式提取章节信息 | |
pattern = r"(\d+)\s+(.*)" | |
matches = re.findall(pattern, input_str) | |
# 生成书签列表 | |
start = False | |
bookmarks = [] | |
for match in matches: | |
page = int(match[0]) | |
# chapter = "第{}章 {}".format(match[1], match[2]) | |
chapter = match[1] | |
# bookmark = OutlineItem(chapter, int(page) + 21), | |
# bookmarks.append(bookmark) | |
# print(chapter, page) | |
if "第一部分" in chapter: | |
start = True | |
page = page + 20 if start else page | |
bookmarks.append([chapter, page]) | |
__import__('pprint').pprint(bookmarks) | |
pdf = Pdf.open(sys.argv[1]) | |
with pdf.open_outline() as outline: | |
outline.root.clear() | |
root = outline.root | |
l1_root = None | |
for i in bookmarks: | |
if "部分" in i[0]: | |
l1_root = OutlineItem(i[0], i[1]) | |
root.append(l1_root) | |
continue | |
if l1_root is not None: | |
l1_root.children.append( | |
OutlineItem(i[0], i[1]), | |
) | |
else: | |
root.append( | |
OutlineItem(i[0], i[1]), | |
) | |
# outline.root.extend([ | |
# # Page counts are zero-based | |
# OutlineItem('Section One', 0), | |
# OutlineItem('Section Two', 2), | |
# OutlineItem('Section Three', 8) | |
# ]) | |
pdf.save("output.pdf") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment