phu54321 · June 24, 2024 10:43
diff --git a/faster-whisper-colab-runner.ipynb b/faster-whisper-colab-runner.ipynb
 {
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "gpuType": "T4",
      "authorship_tag": "ABX9TyM4vtPG2FtEpuoNrjcGGBMr",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU",
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "1176f9c23d5a475b894c01998ce80114": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_17fcf507e79542f197706d006cc70cfc",
            "max": 19.0635625,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_b2845afe89344d0da49b349a2b95d355",
            "value": 19.0635625
          }
        },
        "17fcf507e79542f197706d006cc70cfc": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "b2845afe89344d0da49b349a2b95d355": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        }
      }
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/phu54321/ca8a957ad41f58cded34823fca1f2afc/faster-whisper-colab-runner.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "collapsed": true,
        "id": "vO9qJvQI9iZS",
        "outputId": "59bd4943-7790-475e-bcbd-63f399ffcbc7",
        "cellView": "form"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Collecting faster-whisper\n",
            "  Downloading faster_whisper-1.0.2-py3-none-any.whl (1.5 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.5/1.5 MB\u001b[0m \u001b[31m17.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting yt-dlp\n",
            "  Downloading yt_dlp-2024.5.27-py3-none-any.whl (3.1 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m36.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting av<13,>=11.0 (from faster-whisper)\n",
            "  Downloading av-12.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34.3 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m34.3/34.3 MB\u001b[0m \u001b[31m14.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting ctranslate2<5,>=4.0 (from faster-whisper)\n",
            "  Downloading ctranslate2-4.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (192.3 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m192.3/192.3 MB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: huggingface-hub>=0.13 in /usr/local/lib/python3.10/dist-packages (from faster-whisper) (0.23.4)\n",
            "Requirement already satisfied: tokenizers<1,>=0.13 in /usr/local/lib/python3.10/dist-packages (from faster-whisper) (0.19.1)\n",
            "Collecting onnxruntime<2,>=1.14 (from faster-whisper)\n",
            "  Downloading onnxruntime-1.18.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (6.8 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m39.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting brotli (from yt-dlp)\n",
            "  Downloading Brotli-1.1.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.0 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.0/3.0 MB\u001b[0m \u001b[31m51.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from yt-dlp) (2024.6.2)\n",
            "Collecting mutagen (from yt-dlp)\n",
            "  Downloading mutagen-1.47.0-py3-none-any.whl (194 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.4/194.4 kB\u001b[0m \u001b[31m27.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting pycryptodomex (from yt-dlp)\n",
            "  Downloading pycryptodomex-3.20.0-cp35-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.1 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.1/2.1 MB\u001b[0m \u001b[31m30.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: requests<3,>=2.31.0 in /usr/local/lib/python3.10/dist-packages (from yt-dlp) (2.31.0)\n",
            "Requirement already satisfied: urllib3<3,>=1.26.17 in /usr/local/lib/python3.10/dist-packages (from yt-dlp) (2.0.7)\n",
            "Collecting websockets>=12.0 (from yt-dlp)\n",
            "  Downloading websockets-12.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (130 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m130.2/130.2 kB\u001b[0m \u001b[31m10.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from ctranslate2<5,>=4.0->faster-whisper) (67.7.2)\n",
            "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from ctranslate2<5,>=4.0->faster-whisper) (1.25.2)\n",
            "Requirement already satisfied: pyyaml<7,>=5.3 in /usr/local/lib/python3.10/dist-packages (from ctranslate2<5,>=4.0->faster-whisper) (6.0.1)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.13->faster-whisper) (3.15.1)\n",
            "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.13->faster-whisper) (2023.6.0)\n",
            "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.13->faster-whisper) (24.1)\n",
            "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.13->faster-whisper) (4.66.4)\n",
            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.13->faster-whisper) (4.12.2)\n",
            "Collecting coloredlogs (from onnxruntime<2,>=1.14->faster-whisper)\n",
            "  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: flatbuffers in /usr/local/lib/python3.10/dist-packages (from onnxruntime<2,>=1.14->faster-whisper) (24.3.25)\n",
            "Requirement already satisfied: protobuf in /usr/local/lib/python3.10/dist-packages (from onnxruntime<2,>=1.14->faster-whisper) (3.20.3)\n",
            "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from onnxruntime<2,>=1.14->faster-whisper) (1.12.1)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.31.0->yt-dlp) (3.3.2)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.31.0->yt-dlp) (3.7)\n",
            "Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime<2,>=1.14->faster-whisper)\n",
            "  Downloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: mpmath<1.4.0,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->onnxruntime<2,>=1.14->faster-whisper) (1.3.0)\n",
            "Installing collected packages: brotli, websockets, pycryptodomex, mutagen, humanfriendly, ctranslate2, av, yt-dlp, coloredlogs, onnxruntime, faster-whisper\n",
            "Successfully installed av-12.1.0 brotli-1.1.0 coloredlogs-15.0.1 ctranslate2-4.3.1 faster-whisper-1.0.2 humanfriendly-10.0 mutagen-1.47.0 onnxruntime-1.18.0 pycryptodomex-3.20.0 websockets-12.0 yt-dlp-2024.5.27\n"
          ]
        }
      ],
      "source": [
        "#@title Install dependencies\n",
        "!pip install faster-whisper yt-dlp"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "#@title Transcribe audio to .srt file\n",
        "\n",
        "from ipywidgets import FloatProgress\n",
        "from datetime import datetime\n",
        "import os\n",
        "from faster_whisper import WhisperModel\n",
        "\n",
        "model = WhisperModel(\"medium\", device=\"cuda\", compute_type=\"float16\")\n",
        "\n",
        "srtOutputDir = \"outputs\"\n",
        "os.makedirs(srtOutputDir, exist_ok=True)\n",
        "\n",
        "def timeformat_srt(time):\n",
        "    hours = time // 3600\n",
        "    minutes = (time - hours * 3600) // 60\n",
        "    seconds = time - hours * 3600 - minutes * 60\n",
        "    milliseconds = (time - int(time)) * 1000\n",
        "    return f\"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d},{int(milliseconds):03d}\"\n",
        "\n",
        "def transcribe(filename, language=None):\n",
        "    outputs = []\n",
        "    fileBasename = os.path.basename(filename)\n",
        "\n",
        "    segments, info = model.transcribe(filename, beam_size=5, language=language)\n",
        "\n",
        "    pbar = FloatProgress(min=0, max=info.duration)\n",
        "    display(pbar)\n",
        "\n",
        "    srtOutputChunks = []\n",
        "    for i, segment in enumerate(segments):\n",
        "        srtOutputChunks.append(\n",
        "            f\"{i + 2}\\n{timeformat_srt(segment.start)} --> {timeformat_srt(segment.end)}\\n{segment.text.strip()}\\n\"\n",
        "        )\n",
        "        print(f\"[{timeformat_srt(segment.start)}] {segment.text.strip()}\")\n",
        "        if segment.end is not None:\n",
        "            pbar.value = segment.end\n",
        "\n",
        "    pbar.value = info.duration\n",
        "\n",
        "    base, _ = os.path.splitext(fileBasename)\n",
        "    now = datetime.now()\n",
        "    srtFilename = '%s_%s.srt' % (base, now.strftime(f\"%Y%m%d_%H%S%S\"))\n",
        "    srtPath = os.path.join(srtOutputDir, srtFilename)\n",
        "    srtOutput = \"\\n\".join(srtOutputChunks)\n",
        "    with open(srtPath, 'w') as wf:\n",
        "        wf.write(srtOutput)\n",
        "\n",
        "    return srtPath, srtOutput\n"
      ],
      "metadata": {
        "id": "GJkXP5i6-Zy-",
        "cellView": "form"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "#@title Get audio with yt-dlp\n",
        "\n",
        "import yt_dlp\n",
        "\n",
        "def getYoutubeAudio(url):\n",
        "    final_filename = None\n",
        "\n",
        "    def yt_dlp_monitor(d):\n",
        "        nonlocal final_filename\n",
        "        if d['status'] == 'finished':\n",
        "            final_filename  = d.get('info_dict').get('_filename')\n",
        "\n",
        "    ydl_opts = {\n",
        "        'format': 'm4a/bestaudio/best',\n",
        "        'progress_hooks': [yt_dlp_monitor],\n",
        "        'postprocessors': [{  # Extract audio using ffmpeg\n",
        "            'key': 'FFmpegExtractAudio',\n",
        "            'preferredcodec': 'm4a',\n",
        "        }]\n",
        "    }\n",
        "\n",
        "    with yt_dlp.YoutubeDL(ydl_opts) as ydl:\n",
        "        ydl.download([url])\n",
        "\n",
        "    return final_filename\n"
      ],
      "metadata": {
        "id": "jj5kP8q8w8k0",
        "cellView": "form"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "from google.colab import files\n",
        "\n",
        "url = 'https://www.youtube.com/watch?v=jNQXAC9IVRw' # @param {type:\"string\"}\n",
        "fname = getYoutubeAudio(url)\n",
        "srtPath, srtOutput = transcribe(fname)\n",
        "files.download(srtPath)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 281,
          "referenced_widgets": [
            "1176f9c23d5a475b894c01998ce80114",
            "17fcf507e79542f197706d006cc70cfc",
            "b2845afe89344d0da49b349a2b95d355"
          ]
        },
        "id": "jeeA9cGnAlSD",
        "outputId": "e2bac65b-967b-42f3-fa96-b375f24c6fe4"
      },
      "execution_count": 36,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[youtube] Extracting URL: https://www.youtube.com/watch?v=jNQXAC9IVRw\n",
            "[youtube] jNQXAC9IVRw: Downloading webpage\n",
            "[youtube] jNQXAC9IVRw: Downloading ios player API JSON\n",
            "[youtube] jNQXAC9IVRw: Downloading m3u8 information\n",
            "[info] jNQXAC9IVRw: Downloading 1 format(s): 140\n",
            "[download] Destination: Me at the zoo [jNQXAC9IVRw].m4a\n",
            "[download] 100% of  301.95KiB in 00:00:00 at 7.80MiB/s   \n",
            "[FixupM4a] Correcting container of \"Me at the zoo [jNQXAC9IVRw].m4a\"\n",
            "[ExtractAudio] Not converting audio Me at the zoo [jNQXAC9IVRw].m4a; file is already in target format m4a\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "FloatProgress(value=0.0, max=19.0635625)"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "1176f9c23d5a475b894c01998ce80114"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[00:00:00,000] Alright, so here we are in front of the elephants.\n",
            "[00:00:05,000] The cool thing about these guys is that they have really, really, really long trunks.\n",
            "[00:00:13,000] And that's cool.\n",
            "[00:00:16,000] And that's pretty much all there is to say.\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<IPython.core.display.Javascript object>"
            ],
            "application/javascript": [
              "\n",
              "    async function download(id, filename, size) {\n",
              "      if (!google.colab.kernel.accessAllowed) {\n",
              "        return;\n",
              "      }\n",
              "      const div = document.createElement('div');\n",
              "      const label = document.createElement('label');\n",
              "      label.textContent = `Downloading \"${filename}\": `;\n",
              "      div.appendChild(label);\n",
              "      const progress = document.createElement('progress');\n",
              "      progress.max = size;\n",
              "      div.appendChild(progress);\n",
              "      document.body.appendChild(div);\n",
              "\n",
              "      const buffers = [];\n",
              "      let downloaded = 0;\n",
              "\n",
              "      const channel = await google.colab.kernel.comms.open(id);\n",
              "      // Send a message to notify the kernel that we're ready.\n",
              "      channel.send({})\n",
              "\n",
              "      for await (const message of channel.messages) {\n",
              "        // Send a message to notify the kernel that we're ready.\n",
              "        channel.send({})\n",
              "        if (message.buffers) {\n",
              "          for (const buffer of message.buffers) {\n",
              "            buffers.push(buffer);\n",
              "            downloaded += buffer.byteLength;\n",
              "            progress.value = downloaded;\n",
              "          }\n",
              "        }\n",
              "      }\n",
              "      const blob = new Blob(buffers, {type: 'application/binary'});\n",
              "      const a = document.createElement('a');\n",
              "      a.href = window.URL.createObjectURL(blob);\n",
              "      a.download = filename;\n",
              "      div.appendChild(a);\n",
              "      a.click();\n",
              "      div.remove();\n",
              "    }\n",
              "  "
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<IPython.core.display.Javascript object>"
            ],
            "application/javascript": [
              "download(\"download_09704e79-c4e4-4ade-a34c-b46457c6d60a\", \"Me at the zoo [jNQXAC9IVRw]_20240624_104242.srt\", 329)"
            ]
          },
          "metadata": {}
        }
      ]
    }
  ]
 }
	{
	"nbformat": 4,
	"nbformat_minor": 0,
	"metadata": {
	"colab": {
	"provenance": [],
	"gpuType": "T4",
	"authorship_tag": "ABX9TyM4vtPG2FtEpuoNrjcGGBMr",
	"include_colab_link": true
	},
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3"
	},
	"language_info": {
	"name": "python"
	},
	"accelerator": "GPU",
	"widgets": {
	"application/vnd.jupyter.widget-state+json": {
	"1176f9c23d5a475b894c01998ce80114": {
	"model_module": "@jupyter-widgets/controls",
	"model_name": "FloatProgressModel",
	"model_module_version": "1.5.0",
	"state": {
	"_dom_classes": [],
	"_model_module": "@jupyter-widgets/controls",
	"_model_module_version": "1.5.0",
	"_model_name": "FloatProgressModel",
	"_view_count": null,
	"_view_module": "@jupyter-widgets/controls",
	"_view_module_version": "1.5.0",
	"_view_name": "ProgressView",
	"bar_style": "",
	"description": "",
	"description_tooltip": null,
	"layout": "IPY_MODEL_17fcf507e79542f197706d006cc70cfc",
	"max": 19.0635625,
	"min": 0,
	"orientation": "horizontal",
	"style": "IPY_MODEL_b2845afe89344d0da49b349a2b95d355",
	"value": 19.0635625
	}
	},
	"17fcf507e79542f197706d006cc70cfc": {
	"model_module": "@jupyter-widgets/base",
	"model_name": "LayoutModel",
	"model_module_version": "1.2.0",
	"state": {
	"_model_module": "@jupyter-widgets/base",
	"_model_module_version": "1.2.0",
	"_model_name": "LayoutModel",
	"_view_count": null,
	"_view_module": "@jupyter-widgets/base",
	"_view_module_version": "1.2.0",
	"_view_name": "LayoutView",
	"align_content": null,
	"align_items": null,
	"align_self": null,
	"border": null,
	"bottom": null,
	"display": null,
	"flex": null,
	"flex_flow": null,
	"grid_area": null,
	"grid_auto_columns": null,
	"grid_auto_flow": null,
	"grid_auto_rows": null,
	"grid_column": null,
	"grid_gap": null,
	"grid_row": null,
	"grid_template_areas": null,
	"grid_template_columns": null,
	"grid_template_rows": null,
	"height": null,
	"justify_content": null,
	"justify_items": null,
	"left": null,
	"margin": null,
	"max_height": null,
	"max_width": null,
	"min_height": null,
	"min_width": null,
	"object_fit": null,
	"object_position": null,
	"order": null,
	"overflow": null,
	"overflow_x": null,
	"overflow_y": null,
	"padding": null,
	"right": null,
	"top": null,
	"visibility": null,
	"width": null
	}
	},
	"b2845afe89344d0da49b349a2b95d355": {
	"model_module": "@jupyter-widgets/controls",
	"model_name": "ProgressStyleModel",
	"model_module_version": "1.5.0",
	"state": {
	"_model_module": "@jupyter-widgets/controls",
	"_model_module_version": "1.5.0",
	"_model_name": "ProgressStyleModel",
	"_view_count": null,
	"_view_module": "@jupyter-widgets/base",
	"_view_module_version": "1.2.0",
	"_view_name": "StyleView",
	"bar_color": null,
	"description_width": ""
	}
	}
	}
	}
	},
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "view-in-github",
	"colab_type": "text"
	},
	"source": [
	"<a href=\"https://colab.research.google.com/gist/phu54321/ca8a957ad41f58cded34823fca1f2afc/faster-whisper-colab-runner.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"collapsed": true,
	"id": "vO9qJvQI9iZS",
	"outputId": "59bd4943-7790-475e-bcbd-63f399ffcbc7",
	"cellView": "form"
	},
	"outputs": [
	{
	"output_type": "stream",
	"name": "stdout",
	"text": [
	"Collecting faster-whisper\n",
	" Downloading faster_whisper-1.0.2-py3-none-any.whl (1.5 MB)\n",
	"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.5/1.5 MB\u001b[0m \u001b[31m17.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
	"\u001b[?25hCollecting yt-dlp\n",
	" Downloading yt_dlp-2024.5.27-py3-none-any.whl (3.1 MB)\n",
	"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m36.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
	"\u001b[?25hCollecting av<13,>=11.0 (from faster-whisper)\n",
	" Downloading av-12.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34.3 MB)\n",
	"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m34.3/34.3 MB\u001b[0m \u001b[31m14.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
	"\u001b[?25hCollecting ctranslate2<5,>=4.0 (from faster-whisper)\n",
	" Downloading ctranslate2-4.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (192.3 MB)\n",
	"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m192.3/192.3 MB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
	"\u001b[?25hRequirement already satisfied: huggingface-hub>=0.13 in /usr/local/lib/python3.10/dist-packages (from faster-whisper) (0.23.4)\n",
	"Requirement already satisfied: tokenizers<1,>=0.13 in /usr/local/lib/python3.10/dist-packages (from faster-whisper) (0.19.1)\n",
	"Collecting onnxruntime<2,>=1.14 (from faster-whisper)\n",
	" Downloading onnxruntime-1.18.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (6.8 MB)\n",
	"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m39.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
	"\u001b[?25hCollecting brotli (from yt-dlp)\n",
	" Downloading Brotli-1.1.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.0 MB)\n",
	"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.0/3.0 MB\u001b[0m \u001b[31m51.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
	"\u001b[?25hRequirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from yt-dlp) (2024.6.2)\n",
	"Collecting mutagen (from yt-dlp)\n",
	" Downloading mutagen-1.47.0-py3-none-any.whl (194 kB)\n",
	"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.4/194.4 kB\u001b[0m \u001b[31m27.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
	"\u001b[?25hCollecting pycryptodomex (from yt-dlp)\n",
	" Downloading pycryptodomex-3.20.0-cp35-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.1 MB)\n",
	"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.1/2.1 MB\u001b[0m \u001b[31m30.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
	"\u001b[?25hRequirement already satisfied: requests<3,>=2.31.0 in /usr/local/lib/python3.10/dist-packages (from yt-dlp) (2.31.0)\n",
	"Requirement already satisfied: urllib3<3,>=1.26.17 in /usr/local/lib/python3.10/dist-packages (from yt-dlp) (2.0.7)\n",
	"Collecting websockets>=12.0 (from yt-dlp)\n",
	" Downloading websockets-12.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (130 kB)\n",
	"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m130.2/130.2 kB\u001b[0m \u001b[31m10.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
	"\u001b[?25hRequirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from ctranslate2<5,>=4.0->faster-whisper) (67.7.2)\n",
	"Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from ctranslate2<5,>=4.0->faster-whisper) (1.25.2)\n",
	"Requirement already satisfied: pyyaml<7,>=5.3 in /usr/local/lib/python3.10/dist-packages (from ctranslate2<5,>=4.0->faster-whisper) (6.0.1)\n",
	"Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.13->faster-whisper) (3.15.1)\n",
	"Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.13->faster-whisper) (2023.6.0)\n",
	"Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.13->faster-whisper) (24.1)\n",
	"Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.13->faster-whisper) (4.66.4)\n",
	"Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.13->faster-whisper) (4.12.2)\n",
	"Collecting coloredlogs (from onnxruntime<2,>=1.14->faster-whisper)\n",
	" Downloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)\n",
	"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
	"\u001b[?25hRequirement already satisfied: flatbuffers in /usr/local/lib/python3.10/dist-packages (from onnxruntime<2,>=1.14->faster-whisper) (24.3.25)\n",
	"Requirement already satisfied: protobuf in /usr/local/lib/python3.10/dist-packages (from onnxruntime<2,>=1.14->faster-whisper) (3.20.3)\n",
	"Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from onnxruntime<2,>=1.14->faster-whisper) (1.12.1)\n",
	"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.31.0->yt-dlp) (3.3.2)\n",
	"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.31.0->yt-dlp) (3.7)\n",
	"Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime<2,>=1.14->faster-whisper)\n",
	" Downloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)\n",
	"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
	"\u001b[?25hRequirement already satisfied: mpmath<1.4.0,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->onnxruntime<2,>=1.14->faster-whisper) (1.3.0)\n",
	"Installing collected packages: brotli, websockets, pycryptodomex, mutagen, humanfriendly, ctranslate2, av, yt-dlp, coloredlogs, onnxruntime, faster-whisper\n",
	"Successfully installed av-12.1.0 brotli-1.1.0 coloredlogs-15.0.1 ctranslate2-4.3.1 faster-whisper-1.0.2 humanfriendly-10.0 mutagen-1.47.0 onnxruntime-1.18.0 pycryptodomex-3.20.0 websockets-12.0 yt-dlp-2024.5.27\n"
	]
	}
	],
	"source": [
	"#@title Install dependencies\n",
	"!pip install faster-whisper yt-dlp"
	]
	},
	{
	"cell_type": "code",
	"source": [
	"#@title Transcribe audio to .srt file\n",
	"\n",
	"from ipywidgets import FloatProgress\n",
	"from datetime import datetime\n",
	"import os\n",
	"from faster_whisper import WhisperModel\n",
	"\n",
	"model = WhisperModel(\"medium\", device=\"cuda\", compute_type=\"float16\")\n",
	"\n",
	"srtOutputDir = \"outputs\"\n",
	"os.makedirs(srtOutputDir, exist_ok=True)\n",
	"\n",
	"def timeformat_srt(time):\n",
	" hours = time // 3600\n",
	" minutes = (time - hours * 3600) // 60\n",
	" seconds = time - hours * 3600 - minutes * 60\n",
	" milliseconds = (time - int(time)) * 1000\n",
	" return f\"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d},{int(milliseconds):03d}\"\n",
	"\n",
	"def transcribe(filename, language=None):\n",
	" outputs = []\n",
	" fileBasename = os.path.basename(filename)\n",
	"\n",
	" segments, info = model.transcribe(filename, beam_size=5, language=language)\n",
	"\n",
	" pbar = FloatProgress(min=0, max=info.duration)\n",
	" display(pbar)\n",
	"\n",
	" srtOutputChunks = []\n",
	" for i, segment in enumerate(segments):\n",
	" srtOutputChunks.append(\n",
	" f\"{i + 2}\\n{timeformat_srt(segment.start)} --> {timeformat_srt(segment.end)}\\n{segment.text.strip()}\\n\"\n",
	" )\n",
	" print(f\"[{timeformat_srt(segment.start)}] {segment.text.strip()}\")\n",
	" if segment.end is not None:\n",
	" pbar.value = segment.end\n",
	"\n",
	" pbar.value = info.duration\n",
	"\n",
	" base, _ = os.path.splitext(fileBasename)\n",
	" now = datetime.now()\n",
	" srtFilename = '%s_%s.srt' % (base, now.strftime(f\"%Y%m%d_%H%S%S\"))\n",
	" srtPath = os.path.join(srtOutputDir, srtFilename)\n",
	" srtOutput = \"\\n\".join(srtOutputChunks)\n",
	" with open(srtPath, 'w') as wf:\n",
	" wf.write(srtOutput)\n",
	"\n",
	" return srtPath, srtOutput\n"
	],
	"metadata": {
	"id": "GJkXP5i6-Zy-",
	"cellView": "form"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"#@title Get audio with yt-dlp\n",
	"\n",
	"import yt_dlp\n",
	"\n",
	"def getYoutubeAudio(url):\n",
	" final_filename = None\n",
	"\n",
	" def yt_dlp_monitor(d):\n",
	" nonlocal final_filename\n",
	" if d['status'] == 'finished':\n",
	" final_filename = d.get('info_dict').get('_filename')\n",
	"\n",
	" ydl_opts = {\n",
	" 'format': 'm4a/bestaudio/best',\n",
	" 'progress_hooks': [yt_dlp_monitor],\n",
	" 'postprocessors': [{ # Extract audio using ffmpeg\n",
	" 'key': 'FFmpegExtractAudio',\n",
	" 'preferredcodec': 'm4a',\n",
	" }]\n",
	" }\n",
	"\n",
	" with yt_dlp.YoutubeDL(ydl_opts) as ydl:\n",
	" ydl.download([url])\n",
	"\n",
	" return final_filename\n"
	],
	"metadata": {
	"id": "jj5kP8q8w8k0",
	"cellView": "form"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"from google.colab import files\n",
	"\n",
	"url = 'https://www.youtube.com/watch?v=jNQXAC9IVRw' # @param {type:\"string\"}\n",
	"fname = getYoutubeAudio(url)\n",
	"srtPath, srtOutput = transcribe(fname)\n",
	"files.download(srtPath)"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 281,
	"referenced_widgets": [
	"1176f9c23d5a475b894c01998ce80114",
	"17fcf507e79542f197706d006cc70cfc",
	"b2845afe89344d0da49b349a2b95d355"
	]
	},
	"id": "jeeA9cGnAlSD",
	"outputId": "e2bac65b-967b-42f3-fa96-b375f24c6fe4"
	},
	"execution_count": 36,
	"outputs": [
	{
	"output_type": "stream",
	"name": "stdout",
	"text": [
	"[youtube] Extracting URL: https://www.youtube.com/watch?v=jNQXAC9IVRw\n",
	"[youtube] jNQXAC9IVRw: Downloading webpage\n",
	"[youtube] jNQXAC9IVRw: Downloading ios player API JSON\n",
	"[youtube] jNQXAC9IVRw: Downloading m3u8 information\n",
	"[info] jNQXAC9IVRw: Downloading 1 format(s): 140\n",
	"[download] Destination: Me at the zoo [jNQXAC9IVRw].m4a\n",
	"[download] 100% of 301.95KiB in 00:00:00 at 7.80MiB/s \n",
	"[FixupM4a] Correcting container of \"Me at the zoo [jNQXAC9IVRw].m4a\"\n",
	"[ExtractAudio] Not converting audio Me at the zoo [jNQXAC9IVRw].m4a; file is already in target format m4a\n"
	]
	},
	{
	"output_type": "display_data",
	"data": {
	"text/plain": [
	"FloatProgress(value=0.0, max=19.0635625)"
	],
	"application/vnd.jupyter.widget-view+json": {
	"version_major": 2,
	"version_minor": 0,
	"model_id": "1176f9c23d5a475b894c01998ce80114"
	}
	},
	"metadata": {}
	},
	{
	"output_type": "stream",
	"name": "stdout",
	"text": [
	"[00:00:00,000] Alright, so here we are in front of the elephants.\n",
	"[00:00:05,000] The cool thing about these guys is that they have really, really, really long trunks.\n",
	"[00:00:13,000] And that's cool.\n",
	"[00:00:16,000] And that's pretty much all there is to say.\n"
	]
	},
	{
	"output_type": "display_data",
	"data": {
	"text/plain": [
	"<IPython.core.display.Javascript object>"
	],
	"application/javascript": [
	"\n",
	" async function download(id, filename, size) {\n",
	" if (!google.colab.kernel.accessAllowed) {\n",
	" return;\n",
	" }\n",
	" const div = document.createElement('div');\n",
	" const label = document.createElement('label');\n",
	" label.textContent = `Downloading \"${filename}\": `;\n",
	" div.appendChild(label);\n",
	" const progress = document.createElement('progress');\n",
	" progress.max = size;\n",
	" div.appendChild(progress);\n",
	" document.body.appendChild(div);\n",
	"\n",
	" const buffers = [];\n",
	" let downloaded = 0;\n",
	"\n",
	" const channel = await google.colab.kernel.comms.open(id);\n",
	" // Send a message to notify the kernel that we're ready.\n",
	" channel.send({})\n",
	"\n",
	" for await (const message of channel.messages) {\n",
	" // Send a message to notify the kernel that we're ready.\n",
	" channel.send({})\n",
	" if (message.buffers) {\n",
	" for (const buffer of message.buffers) {\n",
	" buffers.push(buffer);\n",
	" downloaded += buffer.byteLength;\n",
	" progress.value = downloaded;\n",
	" }\n",
	" }\n",
	" }\n",
	" const blob = new Blob(buffers, {type: 'application/binary'});\n",
	" const a = document.createElement('a');\n",
	" a.href = window.URL.createObjectURL(blob);\n",
	" a.download = filename;\n",
	" div.appendChild(a);\n",
	" a.click();\n",
	" div.remove();\n",
	" }\n",
	" "
	]
	},
	"metadata": {}
	},
	{
	"output_type": "display_data",
	"data": {
	"text/plain": [
	"<IPython.core.display.Javascript object>"
	],
	"application/javascript": [
	"download(\"download_09704e79-c4e4-4ade-a34c-b46457c6d60a\", \"Me at the zoo [jNQXAC9IVRw]_20240624_104242.srt\", 329)"
	]
	},
	"metadata": {}
	}
	]
	}
	]
	}