DxPoly · December 5, 2024 10:19 · DxPoly · Dec 5, 2024
diff --git a/epub_processer.py b/epub_processer.py
 import re
 import zipfile
 import tempfile
 import os
 import shutil
 from pathlib import Path

 def process_html_content(content):
    """
    Add <pre> tags around <table class="processedcode"> elements
    """
    pattern = r'(<table class="processedcode"[^>]*>[\s\S]*?</table>)'
    
    def replacer(match):
        return f'<pre>{match.group(1)}</pre>'
    
    return re.sub(pattern, replacer, content)

 def process_epub(epub_path):
    """
    Process all HTML files in the EPUB file
    """
    # Create temporary directory
    temp_dir = tempfile.mkdtemp()
    temp_epub = os.path.join(temp_dir, 'temp.epub')
    
    try:
        # Copy original file to temp directory
        shutil.copy2(epub_path, temp_epub)
        
        # Create output filename
        output_path = str(Path(epub_path).with_stem(Path(epub_path).stem + '_processed'))
        
        # Process EPUB file
        with zipfile.ZipFile(epub_path, 'r') as zip_ref:
            # Create new ZIP file
            with zipfile.ZipFile(output_path, 'w') as zip_out:
                # Iterate through all files
                for item in zip_ref.infolist():
                    data = zip_ref.read(item.filename)
                    
                    # Process HTML files
                    if item.filename.endswith(('.html', '.xhtml')):
                        content = data.decode('utf-8')
                        processed_content = process_html_content(content)
                        zip_out.writestr(item, processed_content.encode('utf-8'))
                    else:
                        # Copy other files directly
                        zip_out.writestr(item, data)
        
        print(f"Processing complete. Output file: {output_path}")
        return output_path
        
    finally:
        # Clean up temporary files
        shutil.rmtree(temp_dir)

 if __name__ == "__main__":
    import sys
    
    if len(sys.argv) != 2:
        print("Usage: python script.py <epub_file_path>")
        sys.exit(1)
    
    epub_path = sys.argv[1]
    process_epub(epub_path)
	import re
	import zipfile
	import tempfile
	import os
	import shutil
	from pathlib import Path

	def process_html_content(content):
	"""
	Add <pre> tags around <table class="processedcode"> elements
	"""
	pattern = r'(<table class="processedcode"[^>]>[\s\S]?</table>)'

	def replacer(match):
	return f'<pre>{match.group(1)}</pre>'

	return re.sub(pattern, replacer, content)

	def process_epub(epub_path):
	"""
	Process all HTML files in the EPUB file
	"""
	# Create temporary directory
	temp_dir = tempfile.mkdtemp()
	temp_epub = os.path.join(temp_dir, 'temp.epub')

	try:
	# Copy original file to temp directory
	shutil.copy2(epub_path, temp_epub)

	# Create output filename
	output_path = str(Path(epub_path).with_stem(Path(epub_path).stem + '_processed'))

	# Process EPUB file
	with zipfile.ZipFile(epub_path, 'r') as zip_ref:
	# Create new ZIP file
	with zipfile.ZipFile(output_path, 'w') as zip_out:
	# Iterate through all files
	for item in zip_ref.infolist():
	data = zip_ref.read(item.filename)

	# Process HTML files
	if item.filename.endswith(('.html', '.xhtml')):
	content = data.decode('utf-8')
	processed_content = process_html_content(content)
	zip_out.writestr(item, processed_content.encode('utf-8'))
	else:
	# Copy other files directly
	zip_out.writestr(item, data)

	print(f"Processing complete. Output file: {output_path}")
	return output_path

	finally:
	# Clean up temporary files
	shutil.rmtree(temp_dir)

	if __name__ == "__main__":
	import sys

	if len(sys.argv) != 2:
	print("Usage: python script.py <epub_file_path>")
	sys.exit(1)

	epub_path = sys.argv[1]
	process_epub(epub_path)