anubhavshrimal · October 6, 2023 19:03
diff --git a/openai_whisper.ipynb b/openai_whisper.ipynb
 {
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/anubhavshrimal/ed9970c94c684a33f863e93d0c8e2c7b/openai_whisper.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "aLg1fy9zuCqe"
      },
      "outputs": [],
      "source": [
        "!pip install git+https://github.com/openai/whisper.git "
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "-gw8Rc-xuOe0"
      },
      "outputs": [],
      "source": [
        "!sudo apt update && sudo apt install ffmpeg"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!nvidia-smi"
      ],
      "metadata": {
        "id": "Mx7LvFjaB6YE"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "import whisper\n",
        "from whisper.utils import write_vtt\n",
        "import sys\n",
        "import subprocess\n",
        "\n",
        "import os"
      ],
      "metadata": {
        "id": "uf1ShlWmDYqX"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "def translate(audio_file, model, covert_to_english=True):\n",
        "    options = dict(beam_size=5, best_of=5)\n",
        "    if covert_to_english:\n",
        "        translate_options = dict(task=\"translate\", **options)\n",
        "    else:\n",
        "        translate_options = dict(task=\"transcribe\", **options)\n",
        "\n",
        "    result = model.transcribe(audio_file, **translate_options)\n",
        "    return result"
      ],
      "metadata": {
        "id": "NCEDWiHmfOuP"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "def write_subtitles(subtitle_output_path, result):\n",
        "    with open(os.path.join(subtitle_output_path), \"w\") as vtt:\n",
        "        write_vtt(result[\"segments\"], file=vtt)\n",
        "    print('Subtitles written at', os.path.join(subtitle_output_path))"
      ],
      "metadata": {
        "id": "MhV3W_7Kep4V"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "model = whisper.load_model(\"large\")"
      ],
      "metadata": {
        "id": "6bWh1Ap6hdDJ"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Convert Audio file to Subtitles"
      ],
      "metadata": {
        "id": "RJ0jnOijm-br"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "input_dir = '/content/'\n",
        "output_dir = '/content/'\n",
        "audio_file = 'audio_file.wav'\n",
        "audio_path = audio_file.split(\".\")[0]\n",
        "subtitle = audio_path + \".vtt\""
      ],
      "metadata": {
        "id": "HADqXLwrhgyR"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "result = translate(os.path.join(input_dir, audio_file), \n",
        "                   model, \n",
        "                   covert_to_english=True)"
      ],
      "metadata": {
        "id": "oLLGwZLVfyet"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "result['text']"
      ],
      "metadata": {
        "id": "TFSNFhpfhS93"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "write_subtitles(os.path.join(output_dir, subtitle), result)"
      ],
      "metadata": {
        "id": "Ug1SbxnPfii8"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Burn subtitles into a Video file"
      ],
      "metadata": {
        "id": "kdBVl6ZKj9aD"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "video_file_name = 'demo.mp4'"
      ],
      "metadata": {
        "id": "ywV6hCjgnG7Q"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "def video2mp3(video_file, output_ext=\"mp3\"):\n",
        "    filename, ext = os.path.splitext(video_file)\n",
        "    subprocess.call([\"ffmpeg\", \"-y\", \"-i\", video_file, f\"{filename}.{output_ext}\"], \n",
        "                    stdout=subprocess.DEVNULL,\n",
        "                    stderr=subprocess.STDOUT)\n",
        "    return f\"{filename}.{output_ext}\""
      ],
      "metadata": {
        "id": "_jWVsn5ZnDVJ"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "audio_file = video2mp3(video_file_name)\n",
        "audio_path = audio_file.split(\".\")[0]\n",
        "subtitle = audio_path + \".vtt\"\n",
        "\n",
        "result = translate(os.path.join(output_dir, audio_file), \n",
        "                   model, \n",
        "                   covert_to_english=True)\n",
        "\n"
      ],
      "metadata": {
        "id": "PJbGTU1onOYG"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "print(result[\"text\"])"
      ],
      "metadata": {
        "id": "IV_13sadoDXm"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "write_subtitles(os.path.join(output_dir, subtitle), result)"
      ],
      "metadata": {
        "id": "SX2t-Y9OoIGz"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "output_video = audio_path + \"_subtitled.mp4\"\n",
        "\n",
        "os.system(f\"ffmpeg -i {video_file_name} -vf subtitles={subtitle} {output_video}\")"
      ],
      "metadata": {
        "id": "mFonkEjdoT_z"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "O63aLLPXpd_M"
      },
      "execution_count": null,
      "outputs": []
    }
  ],
  "metadata": {
    "accelerator": "GPU",
    "colab": {
      "provenance": [],
      "collapsed_sections": [],
      "name": "openai_whisper_audio_transcription.ipynb",
      "include_colab_link": true
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
 }
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "view-in-github",
	"colab_type": "text"
	},
	"source": [
	"<a href=\"https://colab.research.google.com/gist/anubhavshrimal/ed9970c94c684a33f863e93d0c8e2c7b/openai_whisper.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"id": "aLg1fy9zuCqe"
	},
	"outputs": [],
	"source": [
	"!pip install git+https://github.com/openai/whisper.git "
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"id": "-gw8Rc-xuOe0"
	},
	"outputs": [],
	"source": [
	"!sudo apt update && sudo apt install ffmpeg"
	]
	},
	{
	"cell_type": "code",
	"source": [
	"!nvidia-smi"
	],
	"metadata": {
	"id": "Mx7LvFjaB6YE"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"import whisper\n",
	"from whisper.utils import write_vtt\n",
	"import sys\n",
	"import subprocess\n",
	"\n",
	"import os"
	],
	"metadata": {
	"id": "uf1ShlWmDYqX"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"def translate(audio_file, model, covert_to_english=True):\n",
	" options = dict(beam_size=5, best_of=5)\n",
	" if covert_to_english:\n",
	" translate_options = dict(task=\"translate\", **options)\n",
	" else:\n",
	" translate_options = dict(task=\"transcribe\", **options)\n",
	"\n",
	" result = model.transcribe(audio_file, **translate_options)\n",
	" return result"
	],
	"metadata": {
	"id": "NCEDWiHmfOuP"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"def write_subtitles(subtitle_output_path, result):\n",
	" with open(os.path.join(subtitle_output_path), \"w\") as vtt:\n",
	" write_vtt(result[\"segments\"], file=vtt)\n",
	" print('Subtitles written at', os.path.join(subtitle_output_path))"
	],
	"metadata": {
	"id": "MhV3W_7Kep4V"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"model = whisper.load_model(\"large\")"
	],
	"metadata": {
	"id": "6bWh1Ap6hdDJ"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"source": [
	"## Convert Audio file to Subtitles"
	],
	"metadata": {
	"id": "RJ0jnOijm-br"
	}
	},
	{
	"cell_type": "code",
	"source": [
	"input_dir = '/content/'\n",
	"output_dir = '/content/'\n",
	"audio_file = 'audio_file.wav'\n",
	"audio_path = audio_file.split(\".\")[0]\n",
	"subtitle = audio_path + \".vtt\""
	],
	"metadata": {
	"id": "HADqXLwrhgyR"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"result = translate(os.path.join(input_dir, audio_file), \n",
	" model, \n",
	" covert_to_english=True)"
	],
	"metadata": {
	"id": "oLLGwZLVfyet"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"result['text']"
	],
	"metadata": {
	"id": "TFSNFhpfhS93"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"write_subtitles(os.path.join(output_dir, subtitle), result)"
	],
	"metadata": {
	"id": "Ug1SbxnPfii8"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"source": [
	"## Burn subtitles into a Video file"
	],
	"metadata": {
	"id": "kdBVl6ZKj9aD"
	}
	},
	{
	"cell_type": "code",
	"source": [
	"video_file_name = 'demo.mp4'"
	],
	"metadata": {
	"id": "ywV6hCjgnG7Q"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"def video2mp3(video_file, output_ext=\"mp3\"):\n",
	" filename, ext = os.path.splitext(video_file)\n",
	" subprocess.call([\"ffmpeg\", \"-y\", \"-i\", video_file, f\"{filename}.{output_ext}\"], \n",
	" stdout=subprocess.DEVNULL,\n",
	" stderr=subprocess.STDOUT)\n",
	" return f\"{filename}.{output_ext}\""
	],
	"metadata": {
	"id": "_jWVsn5ZnDVJ"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"audio_file = video2mp3(video_file_name)\n",
	"audio_path = audio_file.split(\".\")[0]\n",
	"subtitle = audio_path + \".vtt\"\n",
	"\n",
	"result = translate(os.path.join(output_dir, audio_file), \n",
	" model, \n",
	" covert_to_english=True)\n",
	"\n"
	],
	"metadata": {
	"id": "PJbGTU1onOYG"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"print(result[\"text\"])"
	],
	"metadata": {
	"id": "IV_13sadoDXm"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"write_subtitles(os.path.join(output_dir, subtitle), result)"
	],
	"metadata": {
	"id": "SX2t-Y9OoIGz"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"output_video = audio_path + \"_subtitled.mp4\"\n",
	"\n",
	"os.system(f\"ffmpeg -i {video_file_name} -vf subtitles={subtitle} {output_video}\")"
	],
	"metadata": {
	"id": "mFonkEjdoT_z"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [],
	"metadata": {
	"id": "O63aLLPXpd_M"
	},
	"execution_count": null,
	"outputs": []
	}
	],
	"metadata": {
	"accelerator": "GPU",
	"colab": {
	"provenance": [],
	"collapsed_sections": [],
	"name": "openai_whisper_audio_transcription.ipynb",
	"include_colab_link": true
	},
	"kernelspec": {
	"display_name": "Python 3",
	"name": "python3"
	},
	"language_info": {
	"name": "python"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 0
	}