Created
December 22, 2023 19:01
-
-
Save davigamer987/29130976ec46e5d60620187ffd5dd36f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import tortoise | |
from tortoise import api | |
import requests | |
import speech_recognition as sr | |
from googletrans import Translator | |
from pydub import AudioSegment | |
from pydub.playback import play | |
def convert_ogg_to_wav(ogg_file, wav_file): | |
audio = AudioSegment.from_ogg(ogg_file) | |
audio.export(wav_file, format="wav") | |
def recognize_audio(audio_file, target_language='pt'): | |
wav_file = "temp.wav" | |
convert_ogg_to_wav(audio_file, wav_file) | |
# Load the converted WAV file | |
with sr.AudioFile(wav_file) as source: | |
audio_data = sr.Recognizer().record(source) | |
# Recognize (convert from speech to text) | |
try: | |
text = sr.Recognizer().recognize_google(audio_data, language='en') # Change 'en' to the appropriate language code | |
return text | |
except sr.UnknownValueError: | |
return None | |
def translate_text(text): | |
translator = Translator() | |
try: | |
translated_text = translator.translate(text, dest='pt') | |
return translated_text.text | |
except Exception as e: | |
print(f"Error during translation: {e}") | |
translated_text = "Failed To Translate" | |
def generate_audio(translated_text, output_file='output.wav', lang='pt'): | |
text = translated_text | |
tts = api.TextToSpeech(kv_cache=True, half=True) | |
pcm_audio = tts.tts_with_preset(text, voice="tom", preset='fast') | |
pcm_audio.export(output_file, format='ogg', codec='libvorbis') | |
def process_audio_folder(folder_path): | |
for root, _, files in os.walk(folder_path): | |
for filename in files: | |
if filename.lower().endswith('.ogg'): | |
audio_file_path = os.path.join(root, filename) | |
translated_audio_path = os.path.join(root, f"{os.path.splitext(filename)[0]}.ogg") | |
# Step 1: Speech to Text | |
recognized_text = recognize_audio(audio_file_path) | |
if recognized_text: | |
print(f"Recognized text from {audio_file_path}: {recognized_text}") | |
# Step 2: Translation | |
translated_text = translate_text(recognized_text) | |
print(f"Translated text: {translated_text}") | |
# Step 3: Text to Speech | |
generate_audio(translated_text, output_file=translated_audio_path) | |
print(f"Translated audio saved to {translated_audio_path}") | |
else: | |
print(f"Could not understand audio from {audio_file_path}") | |
# Example usage: | |
if __name__ == "__main__": | |
audio_folder_path = '/home/davi/Documents/The Jackbox Party Pack 7/games/Quiplash3/content/Quiplash3Round1Question/' | |
process_audio_folder(audio_folder_path) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment