Skip to content

Instantly share code, notes, and snippets.

@anubhavshrimal
Last active June 2, 2023 04:57
Show Gist options
  • Save anubhavshrimal/b368b2ec2e918d023ac7569b267327c8 to your computer and use it in GitHub Desktop.
Save anubhavshrimal/b368b2ec2e918d023ac7569b267327c8 to your computer and use it in GitHub Desktop.
openai_whisper-youtube-code.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/anubhavshrimal/b368b2ec2e918d023ac7569b267327c8/openai_whisper-youtube-code.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "aLg1fy9zuCqe"
},
"outputs": [],
"source": [
"!pip install git+https://github.com/openai/whisper.git"
]
},
{
"cell_type": "code",
"source": [
"from google.colab import drive\n",
"drive.mount('/content/drive')"
],
"metadata": {
"id": "Eu8kbPSX15c-"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "-gw8Rc-xuOe0"
},
"outputs": [],
"source": [
"!sudo apt update && sudo apt install ffmpeg"
]
},
{
"cell_type": "code",
"source": [
"!nvidia-smi"
],
"metadata": {
"id": "Mx7LvFjaB6YE"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"!pip install -r whisper/requirements.txt"
],
"metadata": {
"id": "gnUtQ-b4Bi3Z"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"import whisper\n",
"from whisper.utils import get_writer\n",
"import sys\n",
"import subprocess\n",
"\n",
"import os"
],
"metadata": {
"id": "uf1ShlWmDYqX"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"def translate(audio_file, model, covert_to_english=True):\n",
" options = dict(beam_size=5, best_of=5)\n",
" if covert_to_english:\n",
" translate_options = dict(task=\"translate\", **options)\n",
" else:\n",
" translate_options = dict(task=\"transcribe\", **options)\n",
"\n",
" result = model.transcribe(audio_file, **translate_options)\n",
" return result"
],
"metadata": {
"id": "NCEDWiHmfOuP"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"def write_subtitles(subtitle_output_path, subtitle_fname, result):\n",
" writer = get_writer(\"srt\", subtitle_output_path)\n",
" writer(result, subtitle_fname)\n",
" print('Subtitles written at', os.path.join(subtitle_output_path, subtitle_fname, '.srt'))"
],
"metadata": {
"id": "MhV3W_7Kep4V"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"model = whisper.load_model(\"large\")"
],
"metadata": {
"id": "6bWh1Ap6hdDJ"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Convert Audio file to Subtitles"
],
"metadata": {
"id": "RJ0jnOijm-br"
}
},
{
"cell_type": "code",
"source": [
"input_dir = '/content/'\n",
"output_dir = '/content/'\n",
"audio_file = 'audio_file.wav'\n",
"audio_path = audio_file.split(\".\")[0]\n",
"subtitle = audio_path"
],
"metadata": {
"id": "HADqXLwrhgyR"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"result = translate(os.path.join(input_dir, audio_file), \n",
" model, \n",
" covert_to_english=True)"
],
"metadata": {
"id": "oLLGwZLVfyet"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"result['text']"
],
"metadata": {
"id": "TFSNFhpfhS93"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"write_subtitles(output_dir, subtitle, result)"
],
"metadata": {
"id": "Ug1SbxnPfii8"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Burn subtitles into a Video file"
],
"metadata": {
"id": "kdBVl6ZKj9aD"
}
},
{
"cell_type": "code",
"source": [
"video_file_name = 'demo.mp4'"
],
"metadata": {
"id": "ywV6hCjgnG7Q"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"def video2mp3(video_file, output_ext=\"mp3\"):\n",
" filename, ext = os.path.splitext(video_file)\n",
" subprocess.call([\"ffmpeg\", \"-y\", \"-i\", video_file, f\"{filename}.{output_ext}\"], \n",
" stdout=subprocess.DEVNULL,\n",
" stderr=subprocess.STDOUT)\n",
" return f\"{filename}.{output_ext}\""
],
"metadata": {
"id": "_jWVsn5ZnDVJ"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"audio_file = video2mp3(video_file_name)\n",
"audio_path = audio_file.split(\".\")[0]\n",
"subtitle = audio_path\n",
"\n",
"result = translate(os.path.join(output_dir, audio_file), \n",
" model, \n",
" covert_to_english=True)\n",
"\n"
],
"metadata": {
"id": "PJbGTU1onOYG"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"print(result[\"text\"])"
],
"metadata": {
"id": "IV_13sadoDXm"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"write_subtitles(output_dir, subtitle, result)"
],
"metadata": {
"id": "SX2t-Y9OoIGz"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"output_video = audio_path + \"_subtitled.mp4\"\n",
"\n",
"os.system(f\"ffmpeg -i {video_file_name} -vf subtitles={subtitle} {output_video}\")"
],
"metadata": {
"id": "mFonkEjdoT_z"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "fFfvyH9EIr4B"
},
"execution_count": null,
"outputs": []
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"provenance": [],
"include_colab_link": true
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment