Asif-Iqbal-Bhatti · November 29, 2024 15:07
diff --git a/get_text_from_screenshot.py b/get_text_from_screenshot.py
 from PIL import Image
 import pytesseract
 from fpdf import FPDF


 def extract_and_clean_text(image_path):
    # Extract text from the image
    image = Image.open(image_path)
    raw_text = pytesseract.image_to_string(image)
    
    # Clean text: Remove unnecessary line breaks but keep paragraph structure
    paragraphs = raw_text.strip().split("\n\n")
    cleaned_text = "\n\n".join(paragraph.replace("\n", " ") for paragraph in paragraphs)
    return cleaned_text

 # Function to save text to a PDF with book-like formatting
 def save_text_to_pdf(text, pdf_path):
    pdf = FPDF(format='A4')
    pdf.set_margins(20, 20, 20)
    pdf.set_auto_page_break(auto=True)
    pdf.add_page()
    pdf.set_font("Times", size=10)
    
    # Add text with full-page width formatting
    #pdf.multi_cell(0, 10, text)
    pdf.multi_cell(0, 5, text, align="J")
    pdf.output(pdf_path)

 # Extract and process text from the image
 image_path = '5922636369790880076.jpg'
 text = extract_and_clean_text(image_path)

 # Save the continuous text to a PDF
 pdf_path = 'output_book_style.pdf'
 save_text_to_pdf(text, pdf_path)

 print(f"PDF saved to {pdf_path}")
	from PIL import Image
	import pytesseract
	from fpdf import FPDF


	def extract_and_clean_text(image_path):
	# Extract text from the image
	image = Image.open(image_path)
	raw_text = pytesseract.image_to_string(image)

	# Clean text: Remove unnecessary line breaks but keep paragraph structure
	paragraphs = raw_text.strip().split("\n\n")
	cleaned_text = "\n\n".join(paragraph.replace("\n", " ") for paragraph in paragraphs)
	return cleaned_text

	# Function to save text to a PDF with book-like formatting
	def save_text_to_pdf(text, pdf_path):
	pdf = FPDF(format='A4')
	pdf.set_margins(20, 20, 20)
	pdf.set_auto_page_break(auto=True)
	pdf.add_page()
	pdf.set_font("Times", size=10)

	# Add text with full-page width formatting
	#pdf.multi_cell(0, 10, text)
	pdf.multi_cell(0, 5, text, align="J")
	pdf.output(pdf_path)

	# Extract and process text from the image
	image_path = '5922636369790880076.jpg'
	text = extract_and_clean_text(image_path)

	# Save the continuous text to a PDF
	pdf_path = 'output_book_style.pdf'
	save_text_to_pdf(text, pdf_path)

	print(f"PDF saved to {pdf_path}")