Skip to content

Instantly share code, notes, and snippets.

@bigeyex
Created October 13, 2024 15:45
Show Gist options
  • Save bigeyex/683fb96b85f0385740e26e39cbac559c to your computer and use it in GitHub Desktop.
Save bigeyex/683fb96b85f0385740e26e39cbac559c to your computer and use it in GitHub Desktop.
from tkkit import *
from shutil import copytree
import fitz
from PIL import Image
import re
import os
app = TKApp('My App')
def add_log(text):
app.set_value("log", app.get_value("log") + '\n' + text)
def process_files():
filename_pattern = app.get_value("filename_pattern")
is_cut_half = app.get_value("is_cut_half")
keep_folder_structure = app.get_value("keep_folder_structure")
add_log('开始处理')
# 得到文件列表。因为copytree虽然很方便,但顺序是没有保证的。
files_to_process = [] # (input_dir, input_basename, output_file)
def add_file(source_file, target_file):
if source_file.endswith('.png') or source_file.endswith('.tif') or source_file.endswith('.tiff') or source_file.endswith('.jpg') or source_file.endswith('.jpeg') or source_file.endswith('.pdf'):
if filename_pattern=='' or (re.search(filename_pattern, source_file) is not None):
files_to_process.append((os.path.dirname(source_file), os.path.basename(source_file), target_file))
copytree(app.get_value("input_dir"), app.get_value("output_dir"), copy_function=add_file, dirs_exist_ok=True)
files_to_process = sorted(files_to_process, key=lambda x: (x[0], x[1]))
output_folder = app.get_value("output_dir")
last_dir = '' # 因为支持对多个文件夹批量切图;记住上一个遍历的文件夹
volumn_index = 0
filename_prefix = ''
for input_dir, input_basename, output_file in files_to_process:
add_log(f'处理 {input_dir} {input_basename}')
if keep_folder_structure:
output_folder = os.path.dirname(output_file)
if last_dir != input_dir: # 如果换了一个目录,则从新开始编号
file_index = 1
volumn_index = volumn_index + 1
if not keep_folder_structure:
filename_prefix = '{:04}_'.format(volumn_index)
last_dir = input_dir
if input_basename.endswith('.pdf'):
doc = fitz.open(os.path.join(input_dir, input_basename))
# To get better resolution
zoom_x = 2.0 # horizontal zoom
zoom_y = 2.0 # vertical zoom
mat = fitz.Matrix(zoom_x, zoom_y) # zoom factor 2 in each dimension
for i in range(doc.page_count):
page = doc[i]
# 获取页面的宽和高
page_rect = page.bound()
width = page_rect.width
height = page_rect.height
if is_cut_half and width / height > 0.7:
left_rect = fitz.Rect(0, 0, width/2, height)
right_rect = fitz.Rect(width/2, 0, width, height)
# Split the page in half horizontally
page.set_cropbox(left_rect)
page.get_pixmap(matrix=mat).pil_save(os.path.join(output_folder, "{}{:04}B.png".format(filename_prefix, file_index+i)))
page.set_cropbox(right_rect)
page.get_pixmap(matrix=mat).pil_save(os.path.join(output_folder, "{}{:04}A.png".format(filename_prefix, file_index+i+1)))
# If the width is not greater than the height, save the page as a PNG file
else:
page.get_pixmap(matrix=mat).pil_save(os.path.join(output_folder, "{}{:04}.png".format(filename_prefix, file_index+i+1)))
file_index = file_index + 1
window = Window([
Column([
FilePicker("选择pdf文件夹", dir=True, name="input_dir"),
FilePicker("选择输出文件夹", dir=True, name="output_dir"),
Row([
Label('只处理文件名匹配规则的文件(可不填)'),
TextBox('', name="filename_pattern"),
]),
CheckBox('从中间切半页', name="is_cut_half"),
CheckBox('保持原有的文件夹结构', name="keep_folder_structure"),
Button('转换', on_click=process_files),
TextBox(name="log", lines=10)
], padding=8)
])
app.show(window)
app.run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment