Created
June 24, 2024 10:43
-
-
Save phu54321/ca8a957ad41f58cded34823fca1f2afc to your computer and use it in GitHub Desktop.
Faster Whisper Colab Runner.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"provenance": [], | |
"gpuType": "T4", | |
"authorship_tag": "ABX9TyM4vtPG2FtEpuoNrjcGGBMr", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
}, | |
"accelerator": "GPU", | |
"widgets": { | |
"application/vnd.jupyter.widget-state+json": { | |
"1176f9c23d5a475b894c01998ce80114": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "FloatProgressModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "ProgressView", | |
"bar_style": "", | |
"description": "", | |
"description_tooltip": null, | |
"layout": "IPY_MODEL_17fcf507e79542f197706d006cc70cfc", | |
"max": 19.0635625, | |
"min": 0, | |
"orientation": "horizontal", | |
"style": "IPY_MODEL_b2845afe89344d0da49b349a2b95d355", | |
"value": 19.0635625 | |
} | |
}, | |
"17fcf507e79542f197706d006cc70cfc": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"b2845afe89344d0da49b349a2b95d355": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "ProgressStyleModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "StyleView", | |
"bar_color": null, | |
"description_width": "" | |
} | |
} | |
} | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/phu54321/ca8a957ad41f58cded34823fca1f2afc/faster-whisper-colab-runner.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"collapsed": true, | |
"id": "vO9qJvQI9iZS", | |
"outputId": "59bd4943-7790-475e-bcbd-63f399ffcbc7", | |
"cellView": "form" | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Collecting faster-whisper\n", | |
" Downloading faster_whisper-1.0.2-py3-none-any.whl (1.5 MB)\n", | |
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.5/1.5 MB\u001b[0m \u001b[31m17.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
"\u001b[?25hCollecting yt-dlp\n", | |
" Downloading yt_dlp-2024.5.27-py3-none-any.whl (3.1 MB)\n", | |
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m36.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
"\u001b[?25hCollecting av<13,>=11.0 (from faster-whisper)\n", | |
" Downloading av-12.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34.3 MB)\n", | |
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m34.3/34.3 MB\u001b[0m \u001b[31m14.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
"\u001b[?25hCollecting ctranslate2<5,>=4.0 (from faster-whisper)\n", | |
" Downloading ctranslate2-4.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (192.3 MB)\n", | |
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m192.3/192.3 MB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
"\u001b[?25hRequirement already satisfied: huggingface-hub>=0.13 in /usr/local/lib/python3.10/dist-packages (from faster-whisper) (0.23.4)\n", | |
"Requirement already satisfied: tokenizers<1,>=0.13 in /usr/local/lib/python3.10/dist-packages (from faster-whisper) (0.19.1)\n", | |
"Collecting onnxruntime<2,>=1.14 (from faster-whisper)\n", | |
" Downloading onnxruntime-1.18.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (6.8 MB)\n", | |
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m39.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
"\u001b[?25hCollecting brotli (from yt-dlp)\n", | |
" Downloading Brotli-1.1.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.0 MB)\n", | |
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.0/3.0 MB\u001b[0m \u001b[31m51.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
"\u001b[?25hRequirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from yt-dlp) (2024.6.2)\n", | |
"Collecting mutagen (from yt-dlp)\n", | |
" Downloading mutagen-1.47.0-py3-none-any.whl (194 kB)\n", | |
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.4/194.4 kB\u001b[0m \u001b[31m27.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
"\u001b[?25hCollecting pycryptodomex (from yt-dlp)\n", | |
" Downloading pycryptodomex-3.20.0-cp35-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.1 MB)\n", | |
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.1/2.1 MB\u001b[0m \u001b[31m30.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
"\u001b[?25hRequirement already satisfied: requests<3,>=2.31.0 in /usr/local/lib/python3.10/dist-packages (from yt-dlp) (2.31.0)\n", | |
"Requirement already satisfied: urllib3<3,>=1.26.17 in /usr/local/lib/python3.10/dist-packages (from yt-dlp) (2.0.7)\n", | |
"Collecting websockets>=12.0 (from yt-dlp)\n", | |
" Downloading websockets-12.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (130 kB)\n", | |
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m130.2/130.2 kB\u001b[0m \u001b[31m10.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
"\u001b[?25hRequirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from ctranslate2<5,>=4.0->faster-whisper) (67.7.2)\n", | |
"Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from ctranslate2<5,>=4.0->faster-whisper) (1.25.2)\n", | |
"Requirement already satisfied: pyyaml<7,>=5.3 in /usr/local/lib/python3.10/dist-packages (from ctranslate2<5,>=4.0->faster-whisper) (6.0.1)\n", | |
"Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.13->faster-whisper) (3.15.1)\n", | |
"Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.13->faster-whisper) (2023.6.0)\n", | |
"Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.13->faster-whisper) (24.1)\n", | |
"Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.13->faster-whisper) (4.66.4)\n", | |
"Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.13->faster-whisper) (4.12.2)\n", | |
"Collecting coloredlogs (from onnxruntime<2,>=1.14->faster-whisper)\n", | |
" Downloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)\n", | |
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
"\u001b[?25hRequirement already satisfied: flatbuffers in /usr/local/lib/python3.10/dist-packages (from onnxruntime<2,>=1.14->faster-whisper) (24.3.25)\n", | |
"Requirement already satisfied: protobuf in /usr/local/lib/python3.10/dist-packages (from onnxruntime<2,>=1.14->faster-whisper) (3.20.3)\n", | |
"Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from onnxruntime<2,>=1.14->faster-whisper) (1.12.1)\n", | |
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.31.0->yt-dlp) (3.3.2)\n", | |
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.31.0->yt-dlp) (3.7)\n", | |
"Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime<2,>=1.14->faster-whisper)\n", | |
" Downloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)\n", | |
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
"\u001b[?25hRequirement already satisfied: mpmath<1.4.0,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->onnxruntime<2,>=1.14->faster-whisper) (1.3.0)\n", | |
"Installing collected packages: brotli, websockets, pycryptodomex, mutagen, humanfriendly, ctranslate2, av, yt-dlp, coloredlogs, onnxruntime, faster-whisper\n", | |
"Successfully installed av-12.1.0 brotli-1.1.0 coloredlogs-15.0.1 ctranslate2-4.3.1 faster-whisper-1.0.2 humanfriendly-10.0 mutagen-1.47.0 onnxruntime-1.18.0 pycryptodomex-3.20.0 websockets-12.0 yt-dlp-2024.5.27\n" | |
] | |
} | |
], | |
"source": [ | |
"#@title Install dependencies\n", | |
"!pip install faster-whisper yt-dlp" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"#@title Transcribe audio to .srt file\n", | |
"\n", | |
"from ipywidgets import FloatProgress\n", | |
"from datetime import datetime\n", | |
"import os\n", | |
"from faster_whisper import WhisperModel\n", | |
"\n", | |
"model = WhisperModel(\"medium\", device=\"cuda\", compute_type=\"float16\")\n", | |
"\n", | |
"srtOutputDir = \"outputs\"\n", | |
"os.makedirs(srtOutputDir, exist_ok=True)\n", | |
"\n", | |
"def timeformat_srt(time):\n", | |
" hours = time // 3600\n", | |
" minutes = (time - hours * 3600) // 60\n", | |
" seconds = time - hours * 3600 - minutes * 60\n", | |
" milliseconds = (time - int(time)) * 1000\n", | |
" return f\"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d},{int(milliseconds):03d}\"\n", | |
"\n", | |
"def transcribe(filename, language=None):\n", | |
" outputs = []\n", | |
" fileBasename = os.path.basename(filename)\n", | |
"\n", | |
" segments, info = model.transcribe(filename, beam_size=5, language=language)\n", | |
"\n", | |
" pbar = FloatProgress(min=0, max=info.duration)\n", | |
" display(pbar)\n", | |
"\n", | |
" srtOutputChunks = []\n", | |
" for i, segment in enumerate(segments):\n", | |
" srtOutputChunks.append(\n", | |
" f\"{i + 2}\\n{timeformat_srt(segment.start)} --> {timeformat_srt(segment.end)}\\n{segment.text.strip()}\\n\"\n", | |
" )\n", | |
" print(f\"[{timeformat_srt(segment.start)}] {segment.text.strip()}\")\n", | |
" if segment.end is not None:\n", | |
" pbar.value = segment.end\n", | |
"\n", | |
" pbar.value = info.duration\n", | |
"\n", | |
" base, _ = os.path.splitext(fileBasename)\n", | |
" now = datetime.now()\n", | |
" srtFilename = '%s_%s.srt' % (base, now.strftime(f\"%Y%m%d_%H%S%S\"))\n", | |
" srtPath = os.path.join(srtOutputDir, srtFilename)\n", | |
" srtOutput = \"\\n\".join(srtOutputChunks)\n", | |
" with open(srtPath, 'w') as wf:\n", | |
" wf.write(srtOutput)\n", | |
"\n", | |
" return srtPath, srtOutput\n" | |
], | |
"metadata": { | |
"id": "GJkXP5i6-Zy-", | |
"cellView": "form" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"#@title Get audio with yt-dlp\n", | |
"\n", | |
"import yt_dlp\n", | |
"\n", | |
"def getYoutubeAudio(url):\n", | |
" final_filename = None\n", | |
"\n", | |
" def yt_dlp_monitor(d):\n", | |
" nonlocal final_filename\n", | |
" if d['status'] == 'finished':\n", | |
" final_filename = d.get('info_dict').get('_filename')\n", | |
"\n", | |
" ydl_opts = {\n", | |
" 'format': 'm4a/bestaudio/best',\n", | |
" 'progress_hooks': [yt_dlp_monitor],\n", | |
" 'postprocessors': [{ # Extract audio using ffmpeg\n", | |
" 'key': 'FFmpegExtractAudio',\n", | |
" 'preferredcodec': 'm4a',\n", | |
" }]\n", | |
" }\n", | |
"\n", | |
" with yt_dlp.YoutubeDL(ydl_opts) as ydl:\n", | |
" ydl.download([url])\n", | |
"\n", | |
" return final_filename\n" | |
], | |
"metadata": { | |
"id": "jj5kP8q8w8k0", | |
"cellView": "form" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"from google.colab import files\n", | |
"\n", | |
"url = 'https://www.youtube.com/watch?v=jNQXAC9IVRw' # @param {type:\"string\"}\n", | |
"fname = getYoutubeAudio(url)\n", | |
"srtPath, srtOutput = transcribe(fname)\n", | |
"files.download(srtPath)" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 281, | |
"referenced_widgets": [ | |
"1176f9c23d5a475b894c01998ce80114", | |
"17fcf507e79542f197706d006cc70cfc", | |
"b2845afe89344d0da49b349a2b95d355" | |
] | |
}, | |
"id": "jeeA9cGnAlSD", | |
"outputId": "e2bac65b-967b-42f3-fa96-b375f24c6fe4" | |
}, | |
"execution_count": 36, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"[youtube] Extracting URL: https://www.youtube.com/watch?v=jNQXAC9IVRw\n", | |
"[youtube] jNQXAC9IVRw: Downloading webpage\n", | |
"[youtube] jNQXAC9IVRw: Downloading ios player API JSON\n", | |
"[youtube] jNQXAC9IVRw: Downloading m3u8 information\n", | |
"[info] jNQXAC9IVRw: Downloading 1 format(s): 140\n", | |
"[download] Destination: Me at the zoo [jNQXAC9IVRw].m4a\n", | |
"[download] 100% of 301.95KiB in 00:00:00 at 7.80MiB/s \n", | |
"[FixupM4a] Correcting container of \"Me at the zoo [jNQXAC9IVRw].m4a\"\n", | |
"[ExtractAudio] Not converting audio Me at the zoo [jNQXAC9IVRw].m4a; file is already in target format m4a\n" | |
] | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"FloatProgress(value=0.0, max=19.0635625)" | |
], | |
"application/vnd.jupyter.widget-view+json": { | |
"version_major": 2, | |
"version_minor": 0, | |
"model_id": "1176f9c23d5a475b894c01998ce80114" | |
} | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"[00:00:00,000] Alright, so here we are in front of the elephants.\n", | |
"[00:00:05,000] The cool thing about these guys is that they have really, really, really long trunks.\n", | |
"[00:00:13,000] And that's cool.\n", | |
"[00:00:16,000] And that's pretty much all there is to say.\n" | |
] | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"<IPython.core.display.Javascript object>" | |
], | |
"application/javascript": [ | |
"\n", | |
" async function download(id, filename, size) {\n", | |
" if (!google.colab.kernel.accessAllowed) {\n", | |
" return;\n", | |
" }\n", | |
" const div = document.createElement('div');\n", | |
" const label = document.createElement('label');\n", | |
" label.textContent = `Downloading \"${filename}\": `;\n", | |
" div.appendChild(label);\n", | |
" const progress = document.createElement('progress');\n", | |
" progress.max = size;\n", | |
" div.appendChild(progress);\n", | |
" document.body.appendChild(div);\n", | |
"\n", | |
" const buffers = [];\n", | |
" let downloaded = 0;\n", | |
"\n", | |
" const channel = await google.colab.kernel.comms.open(id);\n", | |
" // Send a message to notify the kernel that we're ready.\n", | |
" channel.send({})\n", | |
"\n", | |
" for await (const message of channel.messages) {\n", | |
" // Send a message to notify the kernel that we're ready.\n", | |
" channel.send({})\n", | |
" if (message.buffers) {\n", | |
" for (const buffer of message.buffers) {\n", | |
" buffers.push(buffer);\n", | |
" downloaded += buffer.byteLength;\n", | |
" progress.value = downloaded;\n", | |
" }\n", | |
" }\n", | |
" }\n", | |
" const blob = new Blob(buffers, {type: 'application/binary'});\n", | |
" const a = document.createElement('a');\n", | |
" a.href = window.URL.createObjectURL(blob);\n", | |
" a.download = filename;\n", | |
" div.appendChild(a);\n", | |
" a.click();\n", | |
" div.remove();\n", | |
" }\n", | |
" " | |
] | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"<IPython.core.display.Javascript object>" | |
], | |
"application/javascript": [ | |
"download(\"download_09704e79-c4e4-4ade-a34c-b46457c6d60a\", \"Me at the zoo [jNQXAC9IVRw]_20240624_104242.srt\", 329)" | |
] | |
}, | |
"metadata": {} | |
} | |
] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment