sebington · December 27, 2024 14:58
diff --git a/batch_whisper.ipynb b/batch_whisper.ipynb
 {
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "UzYPccxr87Fc"
      },
      "outputs": [],
      "source": [
        "! pip install -U openai-whisper -q"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "JaKbVbbjFc-C"
      },
      "outputs": [],
      "source": [
        "import whisper"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "Kr5faKybKi4p"
      },
      "outputs": [],
      "source": [
        "# choose a model: tiny (74M), base (141M), small (472M), medium (1.5G), large-v1-v2-v3 (3.0G)\n",
        "model = whisper.load_model(\"large-v3\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "BdjHE5fHhFG9"
      },
      "outputs": [],
      "source": [
        "audio = \"file.mp3\"\n",
        "result = model.transcribe(audio) #, language='fr')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "oeiLLfFLgByE"
      },
      "outputs": [],
      "source": [
        "print(result[\"text\"])"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "0CiwUbC8ybJx"
      },
      "outputs": [],
      "source": [
        "print(result) # dictionnary"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "zj3YEpZ3g476"
      },
      "outputs": [],
      "source": [
        "result['segments']"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "iLpPls3TH-L-"
      },
      "outputs": [],
      "source": [
        "# write results to file\n",
        "with open('result.txt', 'w') as f:\n",
        "  f.write(result[\"text\"])"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "mbfB1TuSxjuE"
      },
      "source": [
        "#### Generate SRT"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "aJzuineWYI3K"
      },
      "outputs": [],
      "source": [
        "from whisper.utils import get_writer\n",
        "\n",
        "output_directory = \"./\"\n",
        "\n",
        "# Save as an SRT file\n",
        "srt_writer = get_writer(\"srt\", output_directory)\n",
        "srt_writer(result, audio)\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "sKftmQ-Dv1zs"
      },
      "source": [
        "#### Batch transcription"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "fuSQ2y93DO9X"
      },
      "outputs": [],
      "source": [
        "# batch transcribe\n",
        "# cells 1-3 MUST be run first\n",
        "# files MUST be named *_en.wav or *_fr.mp3 etc.\n",
        "\n",
        "from whisper.utils import get_writer\n",
        "import os\n",
        "\n",
        "# Get a list of all files in the current directory\n",
        "files_in_directory = os.listdir()\n",
        "\n",
        "# Filter the list to include only files with a specific extension (e.g., mp3)\n",
        "audio_files = [file for file in files_in_directory if file.endswith(\".mp3\")]\n",
        "\n",
        "# Iterate through each audio file and transcribe\n",
        "for audio_file in audio_files:\n",
        "    language = audio_file[-6:-4] # slice end of filename to get language code\n",
        "    result = model.transcribe(audio_file, language=language)\n",
        "    srt_writer = get_writer(\"srt\", output_dir=\"\")\n",
        "    srt_writer(result, audio_file)\n",
        "\n",
        "\n",
        "# Indicates end of process\n",
        "print(\"Transcription process completed.\")"
      ]
    }
  ],
  "metadata": {
    "accelerator": "GPU",
    "colab": {
      "gpuType": "T4",
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.13.1"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"id": "UzYPccxr87Fc"
	},
	"outputs": [],
	"source": [
	"! pip install -U openai-whisper -q"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"id": "JaKbVbbjFc-C"
	},
	"outputs": [],
	"source": [
	"import whisper"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"id": "Kr5faKybKi4p"
	},
	"outputs": [],
	"source": [
	"# choose a model: tiny (74M), base (141M), small (472M), medium (1.5G), large-v1-v2-v3 (3.0G)\n",
	"model = whisper.load_model(\"large-v3\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"id": "BdjHE5fHhFG9"
	},
	"outputs": [],
	"source": [
	"audio = \"file.mp3\"\n",
	"result = model.transcribe(audio) #, language='fr')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"id": "oeiLLfFLgByE"
	},
	"outputs": [],
	"source": [
	"print(result[\"text\"])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"id": "0CiwUbC8ybJx"
	},
	"outputs": [],
	"source": [
	"print(result) # dictionnary"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"id": "zj3YEpZ3g476"
	},
	"outputs": [],
	"source": [
	"result['segments']"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"id": "iLpPls3TH-L-"
	},
	"outputs": [],
	"source": [
	"# write results to file\n",
	"with open('result.txt', 'w') as f:\n",
	" f.write(result[\"text\"])"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "mbfB1TuSxjuE"
	},
	"source": [
	"#### Generate SRT"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"id": "aJzuineWYI3K"
	},
	"outputs": [],
	"source": [
	"from whisper.utils import get_writer\n",
	"\n",
	"output_directory = \"./\"\n",
	"\n",
	"# Save as an SRT file\n",
	"srt_writer = get_writer(\"srt\", output_directory)\n",
	"srt_writer(result, audio)\n"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "sKftmQ-Dv1zs"
	},
	"source": [
	"#### Batch transcription"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"id": "fuSQ2y93DO9X"
	},
	"outputs": [],
	"source": [
	"# batch transcribe\n",
	"# cells 1-3 MUST be run first\n",
	"# files MUST be named _en.wav or _fr.mp3 etc.\n",
	"\n",
	"from whisper.utils import get_writer\n",
	"import os\n",
	"\n",
	"# Get a list of all files in the current directory\n",
	"files_in_directory = os.listdir()\n",
	"\n",
	"# Filter the list to include only files with a specific extension (e.g., mp3)\n",
	"audio_files = [file for file in files_in_directory if file.endswith(\".mp3\")]\n",
	"\n",
	"# Iterate through each audio file and transcribe\n",
	"for audio_file in audio_files:\n",
	" language = audio_file[-6:-4] # slice end of filename to get language code\n",
	" result = model.transcribe(audio_file, language=language)\n",
	" srt_writer = get_writer(\"srt\", output_dir=\"\")\n",
	" srt_writer(result, audio_file)\n",
	"\n",
	"\n",
	"# Indicates end of process\n",
	"print(\"Transcription process completed.\")"
	]
	}
	],
	"metadata": {
	"accelerator": "GPU",
	"colab": {
	"gpuType": "T4",
	"provenance": []
	},
	"kernelspec": {
	"display_name": "Python 3",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.13.1"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 0
	}
No results found