bigeyex · October 13, 2024 15:45
diff --git a/pdf_to_png_tool.py b/pdf_to_png_tool.py
 from tkkit import *
 from shutil import copytree
 import fitz
 from PIL import Image
 import re
 import os


 app = TKApp('My App')

 def add_log(text):
    app.set_value("log", app.get_value("log") + '\n' + text)

 def process_files():
    filename_pattern = app.get_value("filename_pattern")
    is_cut_half = app.get_value("is_cut_half")
    keep_folder_structure = app.get_value("keep_folder_structure")

    add_log('开始处理')

    # 得到文件列表。因为copytree虽然很方便，但顺序是没有保证的。
    files_to_process = [] # (input_dir, input_basename, output_file)
    def add_file(source_file, target_file):
        if source_file.endswith('.png') or source_file.endswith('.tif') or source_file.endswith('.tiff') or source_file.endswith('.jpg') or source_file.endswith('.jpeg') or source_file.endswith('.pdf'):
            if filename_pattern=='' or (re.search(filename_pattern, source_file) is not None):
                files_to_process.append((os.path.dirname(source_file), os.path.basename(source_file), target_file))

    copytree(app.get_value("input_dir"), app.get_value("output_dir"), copy_function=add_file, dirs_exist_ok=True)
    files_to_process = sorted(files_to_process, key=lambda x: (x[0], x[1]))

    output_folder = app.get_value("output_dir")
    last_dir = '' # 因为支持对多个文件夹批量切图；记住上一个遍历的文件夹
    volumn_index = 0
    filename_prefix = ''
    
    for input_dir, input_basename, output_file in files_to_process:
        add_log(f'处理 {input_dir} {input_basename}')
        if keep_folder_structure:
            output_folder = os.path.dirname(output_file)
        if last_dir != input_dir: # 如果换了一个目录，则从新开始编号
            file_index = 1
            volumn_index = volumn_index + 1
            if not keep_folder_structure:
                filename_prefix = '{:04}_'.format(volumn_index)
            last_dir = input_dir

        if input_basename.endswith('.pdf'):
            doc = fitz.open(os.path.join(input_dir, input_basename))
            # To get better resolution
            zoom_x = 2.0  # horizontal zoom
            zoom_y = 2.0  # vertical zoom
            mat = fitz.Matrix(zoom_x, zoom_y)  # zoom factor 2 in each dimension

            for i in range(doc.page_count):
                page = doc[i]
                # 获取页面的宽和高
                page_rect = page.bound()
                width = page_rect.width
                height = page_rect.height

                if is_cut_half and width / height > 0.7:
                    left_rect = fitz.Rect(0, 0, width/2, height)
                    right_rect = fitz.Rect(width/2, 0, width, height)
                    # Split the page in half horizontally
                    page.set_cropbox(left_rect)
                    page.get_pixmap(matrix=mat).pil_save(os.path.join(output_folder, "{}{:04}B.png".format(filename_prefix, file_index+i)))
                    page.set_cropbox(right_rect)
                    page.get_pixmap(matrix=mat).pil_save(os.path.join(output_folder, "{}{:04}A.png".format(filename_prefix, file_index+i+1)))
                # If the width is not greater than the height, save the page as a PNG file
                else:
                    page.get_pixmap(matrix=mat).pil_save(os.path.join(output_folder, "{}{:04}.png".format(filename_prefix, file_index+i+1)))
        file_index = file_index + 1




 window = Window([
        Column([
            FilePicker("选择pdf文件夹", dir=True, name="input_dir"),
            FilePicker("选择输出文件夹", dir=True, name="output_dir"),
            Row([
                Label('只处理文件名匹配规则的文件(可不填)'),
                TextBox('', name="filename_pattern"),
            ]),
            
            CheckBox('从中间切半页', name="is_cut_half"),
            CheckBox('保持原有的文件夹结构', name="keep_folder_structure"),
            Button('转换', on_click=process_files),
            TextBox(name="log", lines=10)
        ], padding=8)
    
 ])

 app.show(window)
 app.run()
	from tkkit import *
	from shutil import copytree
	import fitz
	from PIL import Image
	import re
	import os


	app = TKApp('My App')

	def add_log(text):
	app.set_value("log", app.get_value("log") + '\n' + text)

	def process_files():
	filename_pattern = app.get_value("filename_pattern")
	is_cut_half = app.get_value("is_cut_half")
	keep_folder_structure = app.get_value("keep_folder_structure")

	add_log('开始处理')

	# 得到文件列表。因为copytree虽然很方便，但顺序是没有保证的。
	files_to_process = [] # (input_dir, input_basename, output_file)
	def add_file(source_file, target_file):
	if source_file.endswith('.png') or source_file.endswith('.tif') or source_file.endswith('.tiff') or source_file.endswith('.jpg') or source_file.endswith('.jpeg') or source_file.endswith('.pdf'):
	if filename_pattern=='' or (re.search(filename_pattern, source_file) is not None):
	files_to_process.append((os.path.dirname(source_file), os.path.basename(source_file), target_file))

	copytree(app.get_value("input_dir"), app.get_value("output_dir"), copy_function=add_file, dirs_exist_ok=True)
	files_to_process = sorted(files_to_process, key=lambda x: (x[0], x[1]))

	output_folder = app.get_value("output_dir")
	last_dir = '' # 因为支持对多个文件夹批量切图；记住上一个遍历的文件夹
	volumn_index = 0
	filename_prefix = ''

	for input_dir, input_basename, output_file in files_to_process:
	add_log(f'处理 {input_dir} {input_basename}')
	if keep_folder_structure:
	output_folder = os.path.dirname(output_file)
	if last_dir != input_dir: # 如果换了一个目录，则从新开始编号
	file_index = 1
	volumn_index = volumn_index + 1
	if not keep_folder_structure:
	filename_prefix = '{:04}_'.format(volumn_index)
	last_dir = input_dir

	if input_basename.endswith('.pdf'):
	doc = fitz.open(os.path.join(input_dir, input_basename))
	# To get better resolution
	zoom_x = 2.0 # horizontal zoom
	zoom_y = 2.0 # vertical zoom
	mat = fitz.Matrix(zoom_x, zoom_y) # zoom factor 2 in each dimension

	for i in range(doc.page_count):
	page = doc[i]
	# 获取页面的宽和高
	page_rect = page.bound()
	width = page_rect.width
	height = page_rect.height

	if is_cut_half and width / height > 0.7:
	left_rect = fitz.Rect(0, 0, width/2, height)
	right_rect = fitz.Rect(width/2, 0, width, height)
	# Split the page in half horizontally
	page.set_cropbox(left_rect)
	page.get_pixmap(matrix=mat).pil_save(os.path.join(output_folder, "{}{:04}B.png".format(filename_prefix, file_index+i)))
	page.set_cropbox(right_rect)
	page.get_pixmap(matrix=mat).pil_save(os.path.join(output_folder, "{}{:04}A.png".format(filename_prefix, file_index+i+1)))
	# If the width is not greater than the height, save the page as a PNG file
	else:
	page.get_pixmap(matrix=mat).pil_save(os.path.join(output_folder, "{}{:04}.png".format(filename_prefix, file_index+i+1)))
	file_index = file_index + 1




	window = Window([
	Column([
	FilePicker("选择pdf文件夹", dir=True, name="input_dir"),
	FilePicker("选择输出文件夹", dir=True, name="output_dir"),
	Row([
	Label('只处理文件名匹配规则的文件(可不填)'),
	TextBox('', name="filename_pattern"),
	]),

	CheckBox('从中间切半页', name="is_cut_half"),
	CheckBox('保持原有的文件夹结构', name="keep_folder_structure"),
	Button('转换', on_click=process_files),
	TextBox(name="log", lines=10)
	], padding=8)

	])

	app.show(window)
	app.run()