Skip to content

Instantly share code, notes, and snippets.

@anubhavshrimal
Last active October 6, 2023 19:03
Show Gist options
  • Save anubhavshrimal/ed9970c94c684a33f863e93d0c8e2c7b to your computer and use it in GitHub Desktop.
Save anubhavshrimal/ed9970c94c684a33f863e93d0c8e2c7b to your computer and use it in GitHub Desktop.
openai_whisper_audio_transcription.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/anubhavshrimal/ed9970c94c684a33f863e93d0c8e2c7b/openai_whisper.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "aLg1fy9zuCqe"
},
"outputs": [],
"source": [
"!pip install git+https://github.com/openai/whisper.git "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "-gw8Rc-xuOe0"
},
"outputs": [],
"source": [
"!sudo apt update && sudo apt install ffmpeg"
]
},
{
"cell_type": "code",
"source": [
"!nvidia-smi"
],
"metadata": {
"id": "Mx7LvFjaB6YE"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"import whisper\n",
"from whisper.utils import write_vtt\n",
"import sys\n",
"import subprocess\n",
"\n",
"import os"
],
"metadata": {
"id": "uf1ShlWmDYqX"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"def translate(audio_file, model, covert_to_english=True):\n",
" options = dict(beam_size=5, best_of=5)\n",
" if covert_to_english:\n",
" translate_options = dict(task=\"translate\", **options)\n",
" else:\n",
" translate_options = dict(task=\"transcribe\", **options)\n",
"\n",
" result = model.transcribe(audio_file, **translate_options)\n",
" return result"
],
"metadata": {
"id": "NCEDWiHmfOuP"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"def write_subtitles(subtitle_output_path, result):\n",
" with open(os.path.join(subtitle_output_path), \"w\") as vtt:\n",
" write_vtt(result[\"segments\"], file=vtt)\n",
" print('Subtitles written at', os.path.join(subtitle_output_path))"
],
"metadata": {
"id": "MhV3W_7Kep4V"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"model = whisper.load_model(\"large\")"
],
"metadata": {
"id": "6bWh1Ap6hdDJ"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Convert Audio file to Subtitles"
],
"metadata": {
"id": "RJ0jnOijm-br"
}
},
{
"cell_type": "code",
"source": [
"input_dir = '/content/'\n",
"output_dir = '/content/'\n",
"audio_file = 'audio_file.wav'\n",
"audio_path = audio_file.split(\".\")[0]\n",
"subtitle = audio_path + \".vtt\""
],
"metadata": {
"id": "HADqXLwrhgyR"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"result = translate(os.path.join(input_dir, audio_file), \n",
" model, \n",
" covert_to_english=True)"
],
"metadata": {
"id": "oLLGwZLVfyet"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"result['text']"
],
"metadata": {
"id": "TFSNFhpfhS93"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"write_subtitles(os.path.join(output_dir, subtitle), result)"
],
"metadata": {
"id": "Ug1SbxnPfii8"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Burn subtitles into a Video file"
],
"metadata": {
"id": "kdBVl6ZKj9aD"
}
},
{
"cell_type": "code",
"source": [
"video_file_name = 'demo.mp4'"
],
"metadata": {
"id": "ywV6hCjgnG7Q"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"def video2mp3(video_file, output_ext=\"mp3\"):\n",
" filename, ext = os.path.splitext(video_file)\n",
" subprocess.call([\"ffmpeg\", \"-y\", \"-i\", video_file, f\"{filename}.{output_ext}\"], \n",
" stdout=subprocess.DEVNULL,\n",
" stderr=subprocess.STDOUT)\n",
" return f\"{filename}.{output_ext}\""
],
"metadata": {
"id": "_jWVsn5ZnDVJ"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"audio_file = video2mp3(video_file_name)\n",
"audio_path = audio_file.split(\".\")[0]\n",
"subtitle = audio_path + \".vtt\"\n",
"\n",
"result = translate(os.path.join(output_dir, audio_file), \n",
" model, \n",
" covert_to_english=True)\n",
"\n"
],
"metadata": {
"id": "PJbGTU1onOYG"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"print(result[\"text\"])"
],
"metadata": {
"id": "IV_13sadoDXm"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"write_subtitles(os.path.join(output_dir, subtitle), result)"
],
"metadata": {
"id": "SX2t-Y9OoIGz"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"output_video = audio_path + \"_subtitled.mp4\"\n",
"\n",
"os.system(f\"ffmpeg -i {video_file_name} -vf subtitles={subtitle} {output_video}\")"
],
"metadata": {
"id": "mFonkEjdoT_z"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "O63aLLPXpd_M"
},
"execution_count": null,
"outputs": []
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"provenance": [],
"collapsed_sections": [],
"name": "openai_whisper_audio_transcription.ipynb",
"include_colab_link": true
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment