Skip to content

Instantly share code, notes, and snippets.

@rickkk856
Last active October 27, 2021 12:43
Show Gist options
  • Select an option

  • Save rickkk856/c60a76a213e246793de7fe18089fb824 to your computer and use it in GitHub Desktop.

Select an option

Save rickkk856/c60a76a213e246793de7fe18089fb824 to your computer and use it in GitHub Desktop.
Transcrever áudio - mp3 to txt (pt-PT, pt-BR, en-US) - GoogleColab (.wav Curtos e Longos)
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Transcript (mp3-to-txt) pt-BR.ipynb",
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "58gVBa2acRsp"
},
"source": [
"# Transcrever audio - mp3 to txt \n",
"\n",
"pt-PT, pt-BR, en-US\n",
"\n",
"[Source 1](https://github.com/estelasouza/transcricao-de-video-em-texto), [Source 2](https://github.com/binhojulix/audio-para-texto/blob/main/audio-to-texto.ipynb)"
]
},
{
"cell_type": "code",
"metadata": {
"id": "nCS32jvPdNkl"
},
"source": [
"!pip install SpeechRecognition\n",
"!pip install moviepy\n",
"!pip install pydub\n",
"!sudo apt install ffmpeg"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "xSB1cnD9cSuJ"
},
"source": [
"# importing libraries \n",
"import moviepy.editor as mp\n",
"import speech_recognition as sr\n",
"import moviepy.editor as mp\n",
"import sys\n",
"from pydub import AudioSegment\n",
"import os \n",
"from pydub.silence import split_on_silence"
],
"execution_count": 41,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "HzuwINRBdIb9",
"outputId": "68167f63-65c0-478a-9200-3786a3cb85fe"
},
"source": [
"#@markdown SRC is mp3 file of your audio (.mp3)\n",
"src = \"/content/Audio Completo.mp3\" #@param {type:\"string\"}\n",
"\n",
"#@markdown TARGET is the audio that will be used as input to the model (.wav)\n",
"Target = \"/content/Audio Completo.wav\" #@param {type:\"string\"}\n",
"#Use converted wav file\n",
"filename = Target\n",
"\n",
"#@markdown TXT_OUTPUT is the audio transcript (.txt)\n",
"txt_output = \"/content/Audio Completo.txt\" #@param {type:\"string\"}\n",
"\n",
"\n",
"sound = AudioSegment.from_mp3(src)\n",
"sound.export(Target, format=\"wav\")\n",
"\n",
"#@markdown More Supported Languages at [this link](https://stackoverflow.com/questions/14257598/what-are-language-codes-in-chromes-implementation-of-the-html5-speech-recogniti/14302134#14302134)\n",
"language_select = \"pt-BR\" #@param [\"pt-PT\", \"pt-BR\", \"en-US\"]\n",
"\n",
"print(src)\n",
"print(Target)\n",
"print(txt_output)\n",
"print(language_select)\n"
],
"execution_count": 51,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"/content/Audio Completo.mp3\n",
"/content/Audio Completo.wav\n",
"/content/Audio Completo.txt\n",
"pt-PT\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "z6nWvRxWcVB4"
},
"source": [
"# initialize the recognizer\n",
"r = sr.Recognizer()"
],
"execution_count": 52,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 518
},
"id": "xSyDmMHicWcS",
"outputId": "f1507510-2c43-4694-8348-df3407363c81"
},
"source": [
"#@markdown Code for short audios, long audios will return an [Errno 32] Broken pipe\n",
"# open the file\n",
"with sr.AudioFile(filename) as source:\n",
" # listen for the data (load audio to memory)\n",
" audio_data = r.record(source)\n",
" # recognize (convert from speech to text)\n",
" text = r.recognize_google(audio_data,language=language_select))\n",
" print(text)"
],
"execution_count": 53,
"outputs": [
{
"output_type": "error",
"ename": "RequestError",
"evalue": "ignored",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mBrokenPipeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/usr/lib/python3.7/urllib/request.py\u001b[0m in \u001b[0;36mdo_open\u001b[0;34m(self, http_class, req, **http_conn_args)\u001b[0m\n\u001b[1;32m 1349\u001b[0m h.request(req.get_method(), req.selector, req.data, headers,\n\u001b[0;32m-> 1350\u001b[0;31m encode_chunked=req.has_header('Transfer-encoding'))\n\u001b[0m\u001b[1;32m 1351\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mOSError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# timeout error\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/lib/python3.7/http/client.py\u001b[0m in \u001b[0;36mrequest\u001b[0;34m(self, method, url, body, headers, encode_chunked)\u001b[0m\n\u001b[1;32m 1280\u001b[0m \u001b[0;34m\"\"\"Send a complete request to the server.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1281\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_send_request\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode_chunked\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1282\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/lib/python3.7/http/client.py\u001b[0m in \u001b[0;36m_send_request\u001b[0;34m(self, method, url, body, headers, encode_chunked)\u001b[0m\n\u001b[1;32m 1326\u001b[0m \u001b[0mbody\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_encode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'body'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1327\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mendheaders\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode_chunked\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mencode_chunked\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1328\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/lib/python3.7/http/client.py\u001b[0m in \u001b[0;36mendheaders\u001b[0;34m(self, message_body, encode_chunked)\u001b[0m\n\u001b[1;32m 1275\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mCannotSendHeader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1276\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_send_output\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmessage_body\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode_chunked\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mencode_chunked\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1277\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/lib/python3.7/http/client.py\u001b[0m in \u001b[0;36m_send_output\u001b[0;34m(self, message_body, encode_chunked)\u001b[0m\n\u001b[1;32m 1074\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34mb'\\r\\n'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1075\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mchunk\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1076\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/lib/python3.7/http/client.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, data)\u001b[0m\n\u001b[1;32m 996\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 997\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msock\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msendall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 998\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mBrokenPipeError\u001b[0m: [Errno 32] Broken pipe",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[0;31mURLError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/usr/local/lib/python3.7/dist-packages/speech_recognition/__init__.py\u001b[0m in \u001b[0;36mrecognize_google\u001b[0;34m(self, audio_data, key, language, show_all)\u001b[0m\n\u001b[1;32m 839\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 840\u001b[0;31m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0murlopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moperation_timeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 841\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mHTTPError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/lib/python3.7/urllib/request.py\u001b[0m in \u001b[0;36murlopen\u001b[0;34m(url, data, timeout, cafile, capath, cadefault, context)\u001b[0m\n\u001b[1;32m 221\u001b[0m \u001b[0mopener\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_opener\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 222\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mopener\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 223\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/lib/python3.7/urllib/request.py\u001b[0m in \u001b[0;36mopen\u001b[0;34m(self, fullurl, data, timeout)\u001b[0m\n\u001b[1;32m 524\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 525\u001b[0;31m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_open\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mreq\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 526\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/lib/python3.7/urllib/request.py\u001b[0m in \u001b[0;36m_open\u001b[0;34m(self, req, data)\u001b[0m\n\u001b[1;32m 542\u001b[0m result = self._call_chain(self.handle_open, protocol, protocol +\n\u001b[0;32m--> 543\u001b[0;31m '_open', req)\n\u001b[0m\u001b[1;32m 544\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/lib/python3.7/urllib/request.py\u001b[0m in \u001b[0;36m_call_chain\u001b[0;34m(self, chain, kind, meth_name, *args)\u001b[0m\n\u001b[1;32m 502\u001b[0m \u001b[0mfunc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhandler\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmeth_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 503\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 504\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/lib/python3.7/urllib/request.py\u001b[0m in \u001b[0;36mhttp_open\u001b[0;34m(self, req)\u001b[0m\n\u001b[1;32m 1377\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mhttp_open\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreq\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1378\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdo_open\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhttp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclient\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mHTTPConnection\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreq\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1379\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/lib/python3.7/urllib/request.py\u001b[0m in \u001b[0;36mdo_open\u001b[0;34m(self, http_class, req, **http_conn_args)\u001b[0m\n\u001b[1;32m 1351\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mOSError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# timeout error\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1352\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mURLError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1353\u001b[0m \u001b[0mr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mh\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetresponse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mURLError\u001b[0m: <urlopen error [Errno 32] Broken pipe>",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[0;31mRequestError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-53-5deb6fd37fb6>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0maudio_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrecord\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msource\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;31m# recognize (convert from speech to text)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m \u001b[0mtext\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrecognize_google\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maudio_data\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 8\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtext\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.7/dist-packages/speech_recognition/__init__.py\u001b[0m in \u001b[0;36mrecognize_google\u001b[0;34m(self, audio_data, key, language, show_all)\u001b[0m\n\u001b[1;32m 842\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mRequestError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"recognition request failed: {}\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreason\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 843\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mURLError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 844\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mRequestError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"recognition connection failed: {}\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreason\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 845\u001b[0m \u001b[0mresponse_text\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdecode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"utf-8\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 846\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mRequestError\u001b[0m: recognition connection failed: [Errno 32] Broken pipe"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "dKWqQpxycagV"
},
"source": [
"# create a speech recognition object\n",
"r = sr.Recognizer()"
],
"execution_count": 66,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "RMmhhjx1ccY4"
},
"source": [
"# a function that splits the audio file into chunks\n",
"# and applies speech recognition\n",
"def get_large_audio_transcription(path):\n",
" \"\"\"\n",
" Splitting the large audio file into chunks\n",
" and apply speech recognition on each of these chunks\n",
" \"\"\"\n",
" # open the audio file using pydub\n",
" sound = AudioSegment.from_wav(path) \n",
" # split audio sound where silence is 700 miliseconds or more and get chunks\n",
" chunks = split_on_silence(sound,\n",
" # experiment with this value for your target audio file\n",
" min_silence_len = 500,\n",
" # adjust this per requirement\n",
" silence_thresh = sound.dBFS-14,\n",
" # keep the silence for 1 second, adjustable as well\n",
" keep_silence=500,\n",
" )\n",
" folder_name = \"audio\"\n",
" # create a directory to store the audio chunks\n",
" if not os.path.isdir(folder_name):\n",
" os.mkdir(folder_name)\n",
" whole_text = \"\"\n",
" # process each chunk \n",
" for i, audio_chunk in enumerate(chunks, start=1):\n",
" # export audio chunk and save it in\n",
" # the `folder_name` directory.\n",
" chunk_filename = os.path.join(folder_name, f\"split{i}.wav\")\n",
" audio_chunk.export(chunk_filename, format=\"wav\")\n",
" # recognize the chunk\n",
" with sr.AudioFile(chunk_filename) as source:\n",
" audio_listened = r.record(source)\n",
" # try converting it to text\n",
" try:\n",
" text = r.recognize_google(audio_listened,language=language_select)\n",
" #text = r.recognize_google(audio_listened)\n",
" except sr.UnknownValueError as e:\n",
" print(\"Error:\", chunk_filename, str(e))\n",
" else:\n",
" text = f\"{text.capitalize()}. \"\n",
" out_txt = (\"Speaker__:\", text)\n",
" print(out_txt)\n",
" whole_text += text\n",
" arq = open(txt_output,'w')\n",
" arq.write(str(out_txt),\"\\n\")\n",
" arq.close()\n",
" # return the text for all chunks detected\n",
" return whole_text"
],
"execution_count": 67,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "Pp7cj3MCcfim"
},
"source": [
"#Print Complete Audio Transcript\n",
"path = Target\n",
"print(\"\\nFull text:\", get_large_audio_transcription(path))"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "g3D9zhZDjQAu",
"outputId": "2dd75a33-d0fb-468a-9548-0ff9e9461121"
},
"source": [
"#Delete audio Chunks\n",
"!rm -r /content/audio\n",
"print(\"audio-chunks deleted\")"
],
"execution_count": 48,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"rm: cannot remove '/content/audio-chunks': No such file or directory\n",
"audio-chunks deleted\n"
]
}
]
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment