Last active
September 26, 2023 02:28
-
-
Save anotherdirtbag/7edf6780c962f9b09b929b59ad8501c2 to your computer and use it in GitHub Desktop.
A python 3 script to read the text from an epub file using either online(google wavenet) or offline (sapi) tts and save as a series of opus files.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
import zipfile | |
import time | |
import re | |
import urllib3 | |
from dataclasses import dataclass | |
from bs4 import BeautifulSoup | |
#pip3 install --user --upgrade beautifulsoup4 | |
didntconvert = set() | |
ttsengine = 'google' | |
#'sapi', 'google', or 'pyttsx3' | |
#for sapi: pip3 install --user --upgrade git+https://github.com/DeepHorizons/tts | |
#for google: pip3 install --user --upgrade google-cloud-texttospeech google-cloud-storage | |
# you'll also need to make a Google Cloud account and install the Cloud SDK https://cloud.google.com/sdk/ | |
# and occasionally run: gcloud components update | |
#for pyttsx3: pip3 install --user --upgrade pyttsx3 pypiwin32 | |
currentdir = os.getcwd() | |
epub_doc = os.path.join(currentdir,'The Strongest System - Xin Feng.epub') | |
credentialsfile = os.path.join(currentdir,'MY_SECRET_CREDENTIALS_FILE.json') | |
tempaudio = os.path.join(currentdir,'tempaudiofile') | |
outputdirectory = os.path.join(currentdir, os.path.basename(epub_doc)[:-len('.epub')] + ' (' + ttsengine + ')') | |
if not os.path.exists(outputdirectory): | |
os.mkdir(outputdirectory) | |
if ttsengine == 'sapi': | |
import tts.sapi | |
sapivoice = tts.sapi.Sapi() | |
sapivoice.set_rate(1) | |
sapivoice.set_voice(sapivoice.get_voices()[1]) | |
elif ttsengine == 'google': | |
#pip3 install --user --upgrade google-cloud-texttospeech | |
from google.cloud import texttospeech | |
#from google.cloud.texttospeech import enums | |
from google.oauth2 import service_account | |
client = texttospeech.TextToSpeechClient(credentials=credentials) | |
#print(str(client.list_voices())) | |
voice = texttospeech.VoiceSelectionParams( | |
language_code='en-US', | |
name='en-US-Wavenet-C') | |
audio_config = texttospeech.AudioConfig( | |
audio_encoding=texttospeech.AudioEncoding.OGG_OPUS, | |
effects_profile_id=['large-home-entertainment-class-device']) | |
charactermontlyquota = 999900 | |
totalcharacters = 0 | |
charactersperrequest = 4995 | |
requestsperminute = 300 | |
requestcounter = 0 | |
requeststart = time.time() | |
elif ttsengine == 'pyttsx3': | |
import pyttsx3 | |
engine = pyttsx3.init() | |
voices = engine.getProperty('voices') | |
engine.setProperty('voice', voices[1].id) | |
def parsechaptername(soup): | |
chaptertitle = '' | |
for line in soup.select("title"): | |
chaptertitle += str(line.string).strip() | |
chaptertitle = re.sub(r'[^\w\d\ \-]','', chaptertitle) | |
return chaptertitle | |
def parseepubtext(soup): | |
chaptertext = '' | |
for line in soup.select('p'): | |
chaptertext += str(line.get_text()).strip() + '\n' | |
chaptertext = re.sub(r'\n\d+\n','\n',chaptertext) #remove page numbers | |
return chaptertext | |
#this worked for the epub file i had, but the formatting is likely different for others. | |
#google tts only | |
def dosplitrequests(chaptertext): | |
global charactersperrequest | |
splitchar = '.' | |
splitrequests = [] | |
if len(chaptertext) < charactersperrequest: | |
splitrequests = [chaptertext] | |
else: | |
lines = chaptertext.split(splitchar) | |
thisrequest = '' | |
for someline in lines: | |
if len(thisrequest + someline + splitchar) < charactersperrequest: | |
thisrequest += someline + splitchar | |
elif len(thisrequest) == 0: | |
print('single lines >' + str(charactersperrequest) + ' characters not supported. try spliting by spaces instead') | |
sys.exit(1) | |
else: | |
splitrequests.append(thisrequest) | |
if len(someline) > charactersperrequest: | |
print('sentence longer than ' + str(charactersperrequest) ) | |
sys.exit(1) | |
thisrequest = someline + splitchar | |
splitrequests.append(thisrequest) | |
return splitrequests | |
#google tts only | |
def wait_on_requestsperminute(): | |
global requestsperminute | |
global requestcounter | |
global requeststart | |
while(True): | |
now = time.time() | |
if (now - requeststart) > 60: | |
requestcounter = 1 | |
requeststart = now | |
break | |
elif requestcounter < requestsperminute: | |
requestcounter += 1 | |
break | |
else: | |
print('wait 10') | |
time.sleep(10) | |
#source and dst should be the full path wihtout any double-quotes | |
def ffmpeg(source, dst): | |
ffmpegpath = os.path.join(currentdir, r'ffmpeg\bin') | |
command = 'cd "' + ffmpegpath + '" ' | |
if sys.platform == 'win32': | |
command += '&& ffmpeg.exe ' | |
if not os.path.exists( os.path.join(ffmpegpath, 'ffmpeg.exe')): | |
print( os.path.join(ffmpegpath, 'ffmpeg.exe') + ' not found') | |
sys.exit(1) | |
else: # sys.platform.startswith('linux'): | |
command += '&& ./ffmpeg ' | |
if not os.path.exists( os.path.join(ffmpegpath, 'ffmpeg')): | |
print( os.path.join(ffmpegpath, 'ffmpeg') + ' not found') | |
sys.exit(1) | |
command += '-i "' + source + '" ' | |
command += '-map 0:a ' | |
#command += '-c:a libvorbis -aq 2 -ac 1 -f ogg ' #an alternate if libopus isn't available | |
command += '-c:a libopus -ac 1 -b:a 32k -application voip -vbr on -compression_level 10 -f opus ' | |
command += '-y "' + dst + '"' | |
print(command) | |
os.system(command) | |
def savetts(chaptertext, audiofile, secondpass = False): | |
global totalcharacters | |
global charactermontlyquota | |
totalcharacters += len(chaptertext) | |
print('totalcharacters ' + str(totalcharacters), end='\t\t\t\r') | |
if totalcharacters > charactermontlyquota and charactermontlyquota > 0: | |
print('reached character limit') | |
print(os.path.basename(audiofile)) | |
quit() | |
#chaptertext = str(chaptertext).strip() | |
global ttsengine | |
if ttsengine == 'sapi': | |
global sapivoice | |
sapivoice.create_recording(tempaudio + '.wav', chaptertext) | |
ffmpeg(tempaudio + '.wav', audiofile) | |
elif ttsengine == 'google': | |
global client | |
global voice | |
global audio_config | |
splitrequests = dosplitrequests(chaptertext) | |
reqindex = 1 | |
zfillen = len(str(len(splitrequests))) | |
audiofilenoext = audiofile[:-len('.opus')] | |
for req in splitrequests: | |
if len(splitrequests) > 1: | |
thistempaudio = audiofilenoext + '.' + str(reqindex).zfill(zfillen) | |
reqindex += 1 | |
else: | |
thistempaudio = audiofilenoext | |
if (not secondpass and not os.path.exists(thistempaudio +'.opus')) or ( secondpass and os.path.basename(thistempaudio) in didntconvert): | |
wait_on_requestsperminute() | |
input_text = texttospeech.SynthesisInput(text=req) | |
#synthreq = texttospeech.SynthesizeSpeechRequest({"input":input_text, "voice":voice, "audio_config":audio_config}) | |
response = client.synthesize_speech(input=input_text, voice=voice, audio_config=audio_config, retry=None) | |
if response: | |
# The response's audio_content is binary. | |
with open(thistempaudio +'.opus', 'wb') as out: | |
out.write(response.audio_content) | |
print(os.path.basename(thistempaudio +'.opus'),end='\t\t\t\t') | |
else: | |
print('error converting ' + thistempaudio,end='\t\t\t\t') | |
didntconvert.add(thistempaudio) | |
time.sleep(30) #otherwise almost 1/2 of the files use a lower quality voice | |
#ffmpeg(tempaudio + '.opus', audiofile) | |
elif ttsengine == 'pyttsx3': | |
global engine | |
engine.say(chaptertext) | |
engine.runAndWait() | |
def epub_to_text(outputtxt): | |
with open(outputtxt, 'w', encoding = 'utf-8-sig') as foutput: | |
with zipfile.ZipFile(epub_doc) as zip: | |
zipfilenames = [] | |
for somename in zip.namelist(): | |
if str(somename)[-len('.html'):] == '.html': | |
zipfilenames.append(str(somename)[:-len('.html')]) | |
zipfilenames.sort(key = int) | |
for chapterfilename in zipfilenames: | |
ftxt = zip.read(chapterfilename + '.html') | |
soup = BeautifulSoup(ftxt, 'html.parser') | |
chaptertext = parseepubtext(soup) | |
foutput.write(chaptertext) | |
if __name__ == "__main__": | |
with zipfile.ZipFile(epub_doc) as zip: | |
print(str(len(zip.namelist()))) | |
zipfilenames = [] | |
for somename in zip.namelist(): | |
if str(somename)[-len('.html'):] == '.html': | |
zipfilenames.append(str(somename)[:-len('.html')]) | |
zipfilenames.sort(key = int) | |
#limitchapters = 100 | |
zfillen = len(str(len(zipfilenames))) | |
for chapterfilename in zipfilenames: | |
#if int(chapterfilename) > 699 and int(chapterfilename) < 699 + limitchapters: | |
print(chapterfilename) | |
audiofile = os.path.join( outputdirectory, chapterfilename.zfill(zfillen) + '.opus') | |
ftxt = zip.read(chapterfilename + '.html') | |
soup = BeautifulSoup(ftxt, 'html.parser') | |
chaptertext = parseepubtext(soup) | |
savetts(chaptertext, audiofile) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment