Last active
July 30, 2022 07:50
-
-
Save ahmedbr/04b962f3f88c2366b9c93868954c5c82 to your computer and use it in GitHub Desktop.
Convert a textual PDF file to an audio book using Pyhon
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import PyPDF3 | |
import pyttsx3 | |
import pdfplumber | |
import os | |
pdf_file_path = r"path_to_pdf_file.pdf" | |
root, ext = os.path.splitext(pdf_file_path) | |
output_audio_file = root + ".mp3" | |
# get num of pages | |
book = open(pdf_file_path, "rb") | |
pdf_reader = PyPDF3.PdfFileReader(book) | |
pages_num = pdf_reader.numPages | |
# extract text from the textual file | |
extracted_text = "" | |
with pdfplumber.open(pdf_file_path) as pdf: | |
for i in range(0, pages_num): | |
page = pdf.pages[i] | |
extracted_text += page.extract_text() | |
# tts part | |
engine = pyttsx3.init() | |
engine.save_to_file(extracted_text, output_audio_file) | |
engine.runAndWait() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment