Created
February 14, 2023 23:18
-
-
Save malfet/e098ad49ecde484105b5efc7f50db644 to your computer and use it in GitHub Desktop.
Use openai-whisper on CPU vs MPS
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Python 3.10.8 (main, Nov 24 2022, 08:08:27) [Clang 14.0.6 ] on darwin | |
Type "help", "copyright", "credits" or "license" for more information. | |
>>> import torch | |
>>> import whisper | |
>>> torch.__version__ | |
'2.0.0a0+git01de5dd' | |
>>> model = whisper.load_model("base") | |
>>> audio = whisper.load_audio("c1.mp3") # downloaded from https://www.mobydickbigread.com/chapter-1-loomings/ | |
>>> audio = whisper.pad_or_trim(audio) | |
>>> model.transcribe(audio)["text"] | |
/Users/malfet/miniconda3/lib/python3.10/site-packages/whisper/transcribe.py:78: UserWarning: FP16 is not supported on CPU; using FP32 instead | |
warnings.warn("FP16 is not supported on CPU; using FP32 instead") | |
' Mobey Dick or The Whale by Herman Melville. Chapter 1. Loomings. Call me Ishmael. Some years ago, never mind how long precisely, having little or no money in my purse and nothing particular to interest me on shore, I thought I would sail about a little and see the watery part of the world. So I have of driving off the spleen and regulating the circulation.' | |
>>> model.to("mps").transcribe(audio)["text"] | |
/Users/malfet/miniconda3/lib/python3.10/site-packages/whisper/decoding.py:633: UserWarning: 0MPS: no support for int64 repeats mask, casting it to int32 (Triggered internally at /Users/malfet/git/pytorch/pytorch/aten/src/ATen/native/mps/operations/Repeat.mm:236.) | |
audio_features = audio_features.repeat_interleave(self.n_group, dim=0) | |
/Users/malfet/miniconda3/lib/python3.10/site-packages/whisper/decoding.py:439: UserWarning: 1MPS: no support for int64 min/max ops, casting it to int32 (Triggered internally at /Users/malfet/git/pytorch/pytorch/aten/src/ATen/native/mps/operations/ReduceOps.mm:1260.) | |
max_text_token_logprob = logprobs[k, : self.tokenizer.timestamp_begin].max() | |
'<|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|la|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|><|en|>' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Works now, using
python -c "import torch;import whisper;model=whisper.load_model('base').to('mps');audio=whisper.load_audio('c1.mp3');print(model.transcribe(whisper.pad_or_trim(audio))['text'])"