mustakimali · January 4, 2025 13:30
diff --git a/README.md b/README.md
diff --git a/Dockerfile b/Dockerfile
 FROM ubuntu:latest
 WORKDIR /app

 # source: https://pypi.org/project/openai-whisper/
 RUN apt-get update
 RUN apt-get install curl git ffmpeg python3-pip python3.12-venv -y
 RUN python3 -m venv .venv
 RUN .venv/bin/pip install -U openai-whisper
 RUN .venv/bin/pip install --upgrade --no-deps --force-reinstall git+https://github.com/openai/whisper.git
 RUN .venv/bin/pip install setuptools-rust

 ENV PATH="/app/.venv/bin:${PATH}"

 CMD [ "/bin/bash" ]
diff --git a/run.sh b/run.sh
 #!/bin/bash

 set -euo pipefail

 if [ "$#" -lt 1 ] || [ ! -f "$1" ]; then
    echo "Podcast Transcriber"
    echo "Usage: $0 <podcast file> [extra_args...]"
    exit 1
 fi

 file="$1"
 shift
 extra_args="$@"

 docker build -t podcast-transcriber .

 mkdir -p data
 cp "$file" data/
 filename=$(basename "$file")
 docker run -it --rm -v "$(pwd)/data:/app/data" podcast-transcriber whisper "data/$filename" $extra_args
 rm -rf data
Size	Parameters	English-only model	Multilingual model	Required VRAM	Relative speed
tiny	39 M	tiny.en	tiny	~1 GB	~10x
base	74 M	base.en	base	~1 GB	~7x
small	244 M	small.en	small	~2 GB	~4x
medium	769 M	medium.en	medium	~5 GB	~2x
large	1550 M	N/A	large	~10 GB	1x
turbo	809 M	N/A	turbo	~6 GB	~8x
	FROM ubuntu:latest
	WORKDIR /app

	# source: https://pypi.org/project/openai-whisper/
	RUN apt-get update
	RUN apt-get install curl git ffmpeg python3-pip python3.12-venv -y
	RUN python3 -m venv .venv
	RUN .venv/bin/pip install -U openai-whisper
	RUN .venv/bin/pip install --upgrade --no-deps --force-reinstall git+https://github.com/openai/whisper.git
	RUN .venv/bin/pip install setuptools-rust

	ENV PATH="/app/.venv/bin:${PATH}"

	CMD [ "/bin/bash" ]
	#!/bin/bash

	set -euo pipefail

	if [ "$#" -lt 1 ] \|\| [ ! -f "$1" ]; then
	echo "Podcast Transcriber"
	echo "Usage: $0 <podcast file> [extra_args...]"
	exit 1
	fi

	file="$1"
	shift
	extra_args="$@"

	docker build -t podcast-transcriber .

	mkdir -p data
	cp "$file" data/
	filename=$(basename "$file")
	docker run -it --rm -v "$(pwd)/data:/app/data" podcast-transcriber whisper "data/$filename" $extra_args
	rm -rf data