Created
May 23, 2023 12:51
-
-
Save alonsosilvaallende/eb39663fee0269518ed3d6272e84bc93 to your computer and use it in GitHub Desktop.
Speech-to-text.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"provenance": [], | |
"authorship_tag": "ABX9TyO+6ZYbmTRHezugTy2Y9Kex", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
}, | |
"widgets": { | |
"application/vnd.jupyter.widget-state+json": { | |
"0c481e61ff12464198a1c0a399803b04": { | |
"model_module": "jupyter-webrtc", | |
"model_name": "AudioRecorderModel", | |
"model_module_version": "~0.6.0", | |
"state": { | |
"_data_src": "blob:https://292x9sbu7j-496ff2e9c6d22116-0-colab.googleusercontent.com/f8fbe5b6-092b-427a-b952-6666fce6d1cc", | |
"_dom_classes": [], | |
"_model_module": "jupyter-webrtc", | |
"_model_module_version": "~0.6.0", | |
"_model_name": "AudioRecorderModel", | |
"_view_count": null, | |
"_view_module": "jupyter-webrtc", | |
"_view_module_version": "~0.6.0", | |
"_view_name": "AudioRecorderView", | |
"audio": "IPY_MODEL_1688093242ea4342981d924c3ea31407", | |
"autosave": false, | |
"codecs": "", | |
"filename": "record", | |
"format": "webm", | |
"layout": "IPY_MODEL_e2f654db80b5452fbc02f41d4f18d1e1", | |
"recording": false, | |
"stream": "IPY_MODEL_4f22846605584f6e96e483ea1e7108ea" | |
} | |
}, | |
"1688093242ea4342981d924c3ea31407": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "AudioModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "AudioModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "AudioView", | |
"autoplay": true, | |
"controls": true, | |
"format": "webm", | |
"layout": "IPY_MODEL_c2fa706301a14321852ea6e69b5330c9", | |
"loop": true | |
} | |
}, | |
"e2f654db80b5452fbc02f41d4f18d1e1": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"4f22846605584f6e96e483ea1e7108ea": { | |
"model_module": "jupyter-webrtc", | |
"model_name": "CameraStreamModel", | |
"model_module_version": "~0.6.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "jupyter-webrtc", | |
"_model_module_version": "~0.6.0", | |
"_model_name": "CameraStreamModel", | |
"_view_count": null, | |
"_view_module": "jupyter-webrtc", | |
"_view_module_version": "~0.6.0", | |
"_view_name": "MediaStreamView", | |
"constraints": { | |
"audio": true, | |
"video": false | |
}, | |
"layout": "IPY_MODEL_333b2f46bb0c470e8d310c48ea574e20" | |
} | |
}, | |
"c2fa706301a14321852ea6e69b5330c9": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"333b2f46bb0c470e8d310c48ea574e20": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
} | |
} | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/alonsosilvaallende/eb39663fee0269518ed3d6272e84bc93/speech-to-text.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"!pip install -q ipywebrtc" | |
], | |
"metadata": { | |
"id": "nUvwO-W8nKr7" | |
}, | |
"execution_count": 1, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"!pip install -q -U openai-whisper" | |
], | |
"metadata": { | |
"id": "Be8ejIOloQu6" | |
}, | |
"execution_count": 2, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# Para que google colab te deje correr widgets de otras librerías\n", | |
"from google.colab import output\n", | |
"output.enable_custom_widget_manager()" | |
], | |
"metadata": { | |
"id": "47kYix7nnUVH" | |
}, | |
"execution_count": 3, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 94, | |
"referenced_widgets": [ | |
"0c481e61ff12464198a1c0a399803b04", | |
"1688093242ea4342981d924c3ea31407", | |
"e2f654db80b5452fbc02f41d4f18d1e1", | |
"4f22846605584f6e96e483ea1e7108ea", | |
"c2fa706301a14321852ea6e69b5330c9", | |
"333b2f46bb0c470e8d310c48ea574e20" | |
] | |
}, | |
"id": "cBLO1LUZmPZv", | |
"outputId": "3fb6ff18-3704-4bf6-a1e5-f977c900fad2" | |
}, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"AudioRecorder(audio=Audio(value=b'', format='webm'), stream=CameraStream(constraints={'audio': True, 'video': …" | |
], | |
"application/vnd.jupyter.widget-view+json": { | |
"version_major": 2, | |
"version_minor": 0, | |
"model_id": "0c481e61ff12464198a1c0a399803b04" | |
} | |
}, | |
"metadata": { | |
"application/vnd.jupyter.widget-view+json": { | |
"colab": { | |
"custom_widget_manager": { | |
"url": "https://ssl.gstatic.com/colaboratory-static/widgets/colab-cdn-widget-manager/b3e629b1971e1542/manager.min.js" | |
} | |
} | |
} | |
} | |
} | |
], | |
"source": [ | |
"# Graba alguna frase\n", | |
"from ipywebrtc import CameraStream, AudioStream, AudioRecorder\n", | |
"\n", | |
"camera = CameraStream(constraints={'audio': True,'video':False})\n", | |
"recorder = AudioRecorder(stream=camera)\n", | |
"recorder" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# Guardala (pienso que es innecesario pero dejémoslo así por ahora)\n", | |
"recorder.save('example0.webm')" | |
], | |
"metadata": { | |
"id": "FaYlhYvwnR2w" | |
}, | |
"execution_count": 5, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"import whisper" | |
], | |
"metadata": { | |
"id": "dIeKC3bAorGB" | |
}, | |
"execution_count": 6, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# descarga el modelo: tiny, base, small, medium, large\n", | |
"model = whisper.load_model(\"tiny\")" | |
], | |
"metadata": { | |
"id": "kqzkHoylpJUF" | |
}, | |
"execution_count": 7, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# carga el audio (pienso que es innecesario)\n", | |
"audio = whisper.load_audio('example0.webm')" | |
], | |
"metadata": { | |
"id": "mEpGSMwonjbq" | |
}, | |
"execution_count": 8, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"result = whisper.transcribe(model=model, audio='example0.webm', fp16=False, verbose=False)" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "4C4ve0-aop6u", | |
"outputId": "53d5cae3-4d8c-4355-bed9-07a60e000203" | |
}, | |
"execution_count": 9, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Detected language: Spanish\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"name": "stderr", | |
"text": [ | |
"100%|██████████| 419/419 [00:01<00:00, 263.21frames/s]\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"result['text']" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 36 | |
}, | |
"id": "tUZ5p46mpSr0", | |
"outputId": "fe3c3679-d3f0-4ea3-a91b-3483f612c9cf" | |
}, | |
"execution_count": 10, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"' puedes escribir los versos más triste se están oche'" | |
], | |
"application/vnd.google.colaboratory.intrinsic+json": { | |
"type": "string" | |
} | |
}, | |
"metadata": {}, | |
"execution_count": 10 | |
} | |
] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment