Skip to content

Instantly share code, notes, and snippets.

@siathalysedI
Forked from mouredev/voice_translator.py
Created August 1, 2024 17:33

Revisions

  1. @mouredev mouredev revised this gist Jul 30, 2024. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion voice_translator.py
    Original file line number Diff line number Diff line change
    @@ -91,7 +91,7 @@ def text_to_speach(text: str, language: str) -> str:
    ),
    )

    save_file_path = f"audios/{language}.mp3"
    save_file_path = f"{language}.mp3"

    with open(save_file_path, "wb") as f:
    for chunk in response:
  2. @mouredev mouredev created this gist Jul 30, 2024.
    125 changes: 125 additions & 0 deletions voice_translator.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,125 @@
    import gradio as gr
    import whisper
    from translate import Translator
    from dotenv import dotenv_values
    from elevenlabs.client import ElevenLabs
    from elevenlabs import VoiceSettings

    # requirements.txt
    """
    gradio
    openai-whisper
    translate
    python-dotenv
    elevenlabs
    """

    # Configuración .env
    # config = dotenv_values(".env")
    # ELEVENLABS_API_KEY = config["ELEVENLABS_API_KEY"]

    ELEVENLABS_API_KEY = "MY_API_KEY"


    def translator(audio_file):

    # 1. Transcribir texto

    # Usamos Whisper: https://github.com/openai/whisper
    # Alternativa API online: https://www.assemblyai.com

    try:
    model = whisper.load_model("base")
    result = model.transcribe(audio_file, language="Spanish", fp16=False)
    transcription = result["text"]
    except Exception as e:
    raise gr.Error(
    f"Se ha producido un error transcribiendo el texto: {str(e)}")

    print(f"Texto original: {transcription}")

    # 2. Traducir texto

    # Usamos Translate: https://github.com/terryyin/translate-python

    try:
    en_transcription = Translator(
    from_lang="es", to_lang="en").translate(transcription)
    it_transcription = Translator(
    from_lang="es", to_lang="it").translate(transcription)
    fr_transcription = Translator(
    from_lang="es", to_lang="fr").translate(transcription)
    ja_transcription = Translator(
    from_lang="es", to_lang="ja").translate(transcription)
    except Exception as e:
    raise gr.Error(
    f"Se ha producido un error traduciendo el texto: {str(e)}")

    print(f"Texto traducido a Inglés: {en_transcription}")
    print(f"Texto traducido a Italiano: {it_transcription}")
    print(f"Texto traducido a Francés: {fr_transcription}")
    print(f"Texto traducido a Japonés: {ja_transcription}")

    # 3. Generar audio traducido

    # Usamos Elevenlabs IO: https://elevenlabs.io/docs/api-reference/getting-started

    en_save_file_path = text_to_speach(en_transcription, "en")
    it_save_file_path = text_to_speach(it_transcription, "it")
    fr_save_file_path = text_to_speach(fr_transcription, "fr")
    ja_save_file_path = text_to_speach(ja_transcription, "ja")

    return en_save_file_path, it_save_file_path, fr_save_file_path, ja_save_file_path


    def text_to_speach(text: str, language: str) -> str:

    try:
    client = ElevenLabs(api_key=ELEVENLABS_API_KEY)

    response = client.text_to_speech.convert(
    voice_id="pNInz6obpgDQGcFmaJgB", # Adam
    optimize_streaming_latency="0",
    output_format="mp3_22050_32",
    text=text,
    model_id="eleven_turbo_v2",
    voice_settings=VoiceSettings(
    stability=0.0,
    similarity_boost=0.0,
    style=0.0,
    use_speaker_boost=True,
    ),
    )

    save_file_path = f"audios/{language}.mp3"

    with open(save_file_path, "wb") as f:
    for chunk in response:
    if chunk:
    f.write(chunk)

    except Exception as e:
    raise gr.Error(
    f"Se ha producido un error creando el audio: {str(e)}")

    return save_file_path


    web = gr.Interface(
    fn=translator,
    inputs=gr.Audio(
    sources=["microphone"],
    type="filepath",
    label="Español"
    ),
    outputs=[
    gr.Audio(label="Inglés"),
    gr.Audio(label="Italiano"),
    gr.Audio(label="Francés"),
    gr.Audio(label="Japonés")
    ],
    title="Traductor de voz",
    description="Traductor de voz con IA a varios idiomas"
    )

    web.launch()