jlmalone · April 17, 2024 15:23
diff --git a/TextExtract.py b/TextExtract.py
 # pip install ebooklib beautifulsoup4 python-dotenv
 import ebooklib
 from dotenv import load_dotenv
 import os
 from ebooklib import epub
 from bs4 import BeautifulSoup


 def extract_text_from_epub(file_path):
    book = epub.read_epub(file_path)
    text_content = []

    for item in book.get_items():
        if item.get_type() == ebooklib.ITEM_DOCUMENT:
            soup = BeautifulSoup(item.content, 'html.parser')
            text_content.append(soup.get_text())

    return "\n\n".join(text_content)


 load_dotenv()
 epub_path = os.getenv("EPUB_PATH")
 # Specify the path to your EPUB file
 text = extract_text_from_epub(epub_path)
 print(text)
 # TODO decide where text should be saved or whatever
	# pip install ebooklib beautifulsoup4 python-dotenv
	import ebooklib
	from dotenv import load_dotenv
	import os
	from ebooklib import epub
	from bs4 import BeautifulSoup


	def extract_text_from_epub(file_path):
	book = epub.read_epub(file_path)
	text_content = []

	for item in book.get_items():
	if item.get_type() == ebooklib.ITEM_DOCUMENT:
	soup = BeautifulSoup(item.content, 'html.parser')
	text_content.append(soup.get_text())

	return "\n\n".join(text_content)


	load_dotenv()
	epub_path = os.getenv("EPUB_PATH")
	# Specify the path to your EPUB file
	text = extract_text_from_epub(epub_path)
	print(text)
	# TODO decide where text should be saved or whatever