Skip to content

Instantly share code, notes, and snippets.

@alonsosilvaallende
Created May 23, 2023 12:51
Show Gist options
  • Save alonsosilvaallende/eb39663fee0269518ed3d6272e84bc93 to your computer and use it in GitHub Desktop.
Save alonsosilvaallende/eb39663fee0269518ed3d6272e84bc93 to your computer and use it in GitHub Desktop.
Speech-to-text.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"authorship_tag": "ABX9TyO+6ZYbmTRHezugTy2Y9Kex",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"0c481e61ff12464198a1c0a399803b04": {
"model_module": "jupyter-webrtc",
"model_name": "AudioRecorderModel",
"model_module_version": "~0.6.0",
"state": {
"_data_src": "blob:https://292x9sbu7j-496ff2e9c6d22116-0-colab.googleusercontent.com/f8fbe5b6-092b-427a-b952-6666fce6d1cc",
"_dom_classes": [],
"_model_module": "jupyter-webrtc",
"_model_module_version": "~0.6.0",
"_model_name": "AudioRecorderModel",
"_view_count": null,
"_view_module": "jupyter-webrtc",
"_view_module_version": "~0.6.0",
"_view_name": "AudioRecorderView",
"audio": "IPY_MODEL_1688093242ea4342981d924c3ea31407",
"autosave": false,
"codecs": "",
"filename": "record",
"format": "webm",
"layout": "IPY_MODEL_e2f654db80b5452fbc02f41d4f18d1e1",
"recording": false,
"stream": "IPY_MODEL_4f22846605584f6e96e483ea1e7108ea"
}
},
"1688093242ea4342981d924c3ea31407": {
"model_module": "@jupyter-widgets/controls",
"model_name": "AudioModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "AudioModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "AudioView",
"autoplay": true,
"controls": true,
"format": "webm",
"layout": "IPY_MODEL_c2fa706301a14321852ea6e69b5330c9",
"loop": true
}
},
"e2f654db80b5452fbc02f41d4f18d1e1": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"4f22846605584f6e96e483ea1e7108ea": {
"model_module": "jupyter-webrtc",
"model_name": "CameraStreamModel",
"model_module_version": "~0.6.0",
"state": {
"_dom_classes": [],
"_model_module": "jupyter-webrtc",
"_model_module_version": "~0.6.0",
"_model_name": "CameraStreamModel",
"_view_count": null,
"_view_module": "jupyter-webrtc",
"_view_module_version": "~0.6.0",
"_view_name": "MediaStreamView",
"constraints": {
"audio": true,
"video": false
},
"layout": "IPY_MODEL_333b2f46bb0c470e8d310c48ea574e20"
}
},
"c2fa706301a14321852ea6e69b5330c9": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"333b2f46bb0c470e8d310c48ea574e20": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
}
}
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/alonsosilvaallende/eb39663fee0269518ed3d6272e84bc93/speech-to-text.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"source": [
"!pip install -q ipywebrtc"
],
"metadata": {
"id": "nUvwO-W8nKr7"
},
"execution_count": 1,
"outputs": []
},
{
"cell_type": "code",
"source": [
"!pip install -q -U openai-whisper"
],
"metadata": {
"id": "Be8ejIOloQu6"
},
"execution_count": 2,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Para que google colab te deje correr widgets de otras librerías\n",
"from google.colab import output\n",
"output.enable_custom_widget_manager()"
],
"metadata": {
"id": "47kYix7nnUVH"
},
"execution_count": 3,
"outputs": []
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 94,
"referenced_widgets": [
"0c481e61ff12464198a1c0a399803b04",
"1688093242ea4342981d924c3ea31407",
"e2f654db80b5452fbc02f41d4f18d1e1",
"4f22846605584f6e96e483ea1e7108ea",
"c2fa706301a14321852ea6e69b5330c9",
"333b2f46bb0c470e8d310c48ea574e20"
]
},
"id": "cBLO1LUZmPZv",
"outputId": "3fb6ff18-3704-4bf6-a1e5-f977c900fad2"
},
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"AudioRecorder(audio=Audio(value=b'', format='webm'), stream=CameraStream(constraints={'audio': True, 'video': …"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "0c481e61ff12464198a1c0a399803b04"
}
},
"metadata": {
"application/vnd.jupyter.widget-view+json": {
"colab": {
"custom_widget_manager": {
"url": "https://ssl.gstatic.com/colaboratory-static/widgets/colab-cdn-widget-manager/b3e629b1971e1542/manager.min.js"
}
}
}
}
}
],
"source": [
"# Graba alguna frase\n",
"from ipywebrtc import CameraStream, AudioStream, AudioRecorder\n",
"\n",
"camera = CameraStream(constraints={'audio': True,'video':False})\n",
"recorder = AudioRecorder(stream=camera)\n",
"recorder"
]
},
{
"cell_type": "code",
"source": [
"# Guardala (pienso que es innecesario pero dejémoslo así por ahora)\n",
"recorder.save('example0.webm')"
],
"metadata": {
"id": "FaYlhYvwnR2w"
},
"execution_count": 5,
"outputs": []
},
{
"cell_type": "code",
"source": [
"import whisper"
],
"metadata": {
"id": "dIeKC3bAorGB"
},
"execution_count": 6,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# descarga el modelo: tiny, base, small, medium, large\n",
"model = whisper.load_model(\"tiny\")"
],
"metadata": {
"id": "kqzkHoylpJUF"
},
"execution_count": 7,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# carga el audio (pienso que es innecesario)\n",
"audio = whisper.load_audio('example0.webm')"
],
"metadata": {
"id": "mEpGSMwonjbq"
},
"execution_count": 8,
"outputs": []
},
{
"cell_type": "code",
"source": [
"result = whisper.transcribe(model=model, audio='example0.webm', fp16=False, verbose=False)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "4C4ve0-aop6u",
"outputId": "53d5cae3-4d8c-4355-bed9-07a60e000203"
},
"execution_count": 9,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Detected language: Spanish\n"
]
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"100%|██████████| 419/419 [00:01<00:00, 263.21frames/s]\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"result['text']"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 36
},
"id": "tUZ5p46mpSr0",
"outputId": "fe3c3679-d3f0-4ea3-a91b-3483f612c9cf"
},
"execution_count": 10,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"' puedes escribir los versos más triste se están oche'"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "string"
}
},
"metadata": {},
"execution_count": 10
}
]
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment