alonsoir · August 1, 2024 11:05
diff --git a/output whisper commands.txt b/output whisper commands.txt
 https://github.com/openai/whisper

 (whisper_env) ┌<▸> ~/g/openai-whisper-test
 └➤ ffmpeg -f avfoundation -list_devices true -i ""

 ffmpeg version 7.0.1 Copyright (c) 2000-2024 the FFmpeg developers
  built with Apple clang version 15.0.0 (clang-1500.3.9.4)
  configuration: --prefix=/usr/local/Cellar/ffmpeg/7.0.1 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags='-Wl,-ld_classic' --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libharfbuzz --enable-libjxl --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libspeex --enable-libsoxr --enable-libzmq --enable-libzimg --disable-libjack --disable-indev=jack --enable-videotoolbox --enable-audiotoolbox
  libavutil      59.  8.100 / 59.  8.100
  libavcodec     61.  3.100 / 61.  3.100
  libavformat    61.  1.100 / 61.  1.100
  libavdevice    61.  1.100 / 61.  1.100
  libavfilter    10.  1.100 / 10.  1.100
  libswscale      8.  1.100 /  8.  1.100
  libswresample   5.  1.100 /  5.  1.100
  libpostproc    58.  1.100 / 58.  1.100
 2024-08-01 12:59:36.769 ffmpeg[6287:173121] WARNING: Add NSCameraUseContinuityCameraDeviceType to your Info.plist to use AVCaptureDeviceTypeContinuityCamera.
 [AVFoundation indev @ 0x7f9d32a04940] AVFoundation video devices:
 [AVFoundation indev @ 0x7f9d32a04940] [0] Cámara FaceTime HD (integrada)
 [AVFoundation indev @ 0x7f9d32a04940] [1] Capture screen 0
 [AVFoundation indev @ 0x7f9d32a04940] [2] Capture screen 1
 [AVFoundation indev @ 0x7f9d32a04940] AVFoundation audio devices:
 [AVFoundation indev @ 0x7f9d32a04940] [0] Microsoft Teams Audio
 [AVFoundation indev @ 0x7f9d32a04940] [1] Micrófono externo
 [AVFoundation indev @ 0x7f9d32a04940] [2] Micrófono del MacBook Pro
 [in#0 @ 0x7f9d32a04340] Error opening input: Input/output error
 Error opening input file .
 Error opening input files: Input/output error
 (whisper_env) ┌<▪> ~/g/openai-whisper-test
 └➤ ffmpeg -f avfoundation -i ":2" -c:a pcm_s32le -ar 96000 -ac 2 -b:a 3072k grabacion_alta_calidad.wav
 ffmpeg version 7.0.1 Copyright (c) 2000-2024 the FFmpeg developers
  built with Apple clang version 15.0.0 (clang-1500.3.9.4)
  configuration: --prefix=/usr/local/Cellar/ffmpeg/7.0.1 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags='-Wl,-ld_classic' --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libharfbuzz --enable-libjxl --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libspeex --enable-libsoxr --enable-libzmq --enable-libzimg --disable-libjack --disable-indev=jack --enable-videotoolbox --enable-audiotoolbox
  libavutil      59.  8.100 / 59.  8.100
  libavcodec     61.  3.100 / 61.  3.100
  libavformat    61.  1.100 / 61.  1.100
  libavdevice    61.  1.100 / 61.  1.100
  libavfilter    10.  1.100 / 10.  1.100
  libswscale      8.  1.100 /  8.  1.100
  libswresample   5.  1.100 /  5.  1.100
  libpostproc    58.  1.100 / 58.  1.100
 2024-08-01 13:00:19.190 ffmpeg[6320:174001] WARNING: Add NSCameraUseContinuityCameraDeviceType to your Info.plist to use AVCaptureDeviceTypeContinuityCamera.
 Input #0, avfoundation, from ':2':
  Duration: N/A, start: 10095.394354, bitrate: 1536 kb/s
  Stream #0:0: Audio: pcm_f32le, 48000 Hz, mono, flt, 1536 kb/s
 Stream mapping:
  Stream #0:0 -> #0:0 (pcm_f32le (native) -> pcm_s32le (native))
 Press [q] to stop, [?] for help
 Output #0, wav, to 'grabacion_alta_calidad.wav':
  Metadata:
    ISFT            : Lavf61.1.100
  Stream #0:0: Audio: pcm_s32le ([1][0][0][0] / 0x0001), 96000 Hz, stereo, s32, 6144 kb/s
      Metadata:
        encoder         : Lavc61.3.100 pcm_s32le
 [out#0/wav @ 0x7fb7758053c0] video:0KiB audio:36632KiB subtitle:0KiB other streams:0KiB global headers:0KiB muxing overhead: 0.000272%
 size=   36632KiB time=00:00:53.57 bitrate=5600.9kbits/s speed=   1x
 Exiting normally, received signal 2.
 (whisper_env) ┌<▪> ~/g/openai-whisper-test
 └➤ ls
 grabacion_alta_calidad.wav  output.srt	output.txt  output.wav			   party-crowd-daniel_simon.srt  party-crowd-daniel_simon.txt  party-crowd-daniel_simon.wav  pyproject.toml
 output.json		    output.tsv	output.vtt  party-crowd-daniel_simon.json  party-crowd-daniel_simon.tsv  party-crowd-daniel_simon.vtt  poetry.lock		     whisper_env
 (whisper_env) ┌<▸> ~/g/openai-whisper-test
 └➤ afplay grabacion_alta_calidad.wav
 ^C
 (whisper_env) ┌<▪> ~/g/openai-whisper-test
 └➤ whisper --task transcribe --verbose True --language Spanish --threads 4 grabacion_alta_calidad.wav
 /Users/aironman/git/openai-whisper-test/whisper_env/lib/python3.11/site-packages/whisper/transcribe.py:115: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
 [00:00.000 --> 00:10.000]  Hola, esto es una prueba. Vamos a grabar un audio de alta calidad.
 [00:10.000 --> 00:17.000]  Hola, me la malo el sol. Sí, no, se acabó.
 [00:17.000 --> 00:22.000]  El lado, el lado.
 [00:22.000 --> 00:25.000]  Vamos a grabar.
 [00:25.000 --> 00:28.000]  Cuando te doy un poco eso.
 [00:28.000 --> 00:32.000]  Está batellada.
 [00:32.000 --> 00:35.000]  Está batellada.
 [00:35.000 --> 00:48.000]  Está batellada.
 (whisper_env) ┌<▸> ~/g/openai-whisper-test
 └➤
	https://github.com/openai/whisper

	(whisper_env) ┌<▸> ~/g/openai-whisper-test
	└➤ ffmpeg -f avfoundation -list_devices true -i ""

	ffmpeg version 7.0.1 Copyright (c) 2000-2024 the FFmpeg developers
	built with Apple clang version 15.0.0 (clang-1500.3.9.4)
	configuration: --prefix=/usr/local/Cellar/ffmpeg/7.0.1 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags='-Wl,-ld_classic' --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libharfbuzz --enable-libjxl --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libspeex --enable-libsoxr --enable-libzmq --enable-libzimg --disable-libjack --disable-indev=jack --enable-videotoolbox --enable-audiotoolbox
	libavutil 59. 8.100 / 59. 8.100
	libavcodec 61. 3.100 / 61. 3.100
	libavformat 61. 1.100 / 61. 1.100
	libavdevice 61. 1.100 / 61. 1.100
	libavfilter 10. 1.100 / 10. 1.100
	libswscale 8. 1.100 / 8. 1.100
	libswresample 5. 1.100 / 5. 1.100
	libpostproc 58. 1.100 / 58. 1.100
	2024-08-01 12:59:36.769 ffmpeg[6287:173121] WARNING: Add NSCameraUseContinuityCameraDeviceType to your Info.plist to use AVCaptureDeviceTypeContinuityCamera.
	[AVFoundation indev @ 0x7f9d32a04940] AVFoundation video devices:
	[AVFoundation indev @ 0x7f9d32a04940] [0] Cámara FaceTime HD (integrada)
	[AVFoundation indev @ 0x7f9d32a04940] [1] Capture screen 0
	[AVFoundation indev @ 0x7f9d32a04940] [2] Capture screen 1
	[AVFoundation indev @ 0x7f9d32a04940] AVFoundation audio devices:
	[AVFoundation indev @ 0x7f9d32a04940] [0] Microsoft Teams Audio
	[AVFoundation indev @ 0x7f9d32a04940] [1] Micrófono externo
	[AVFoundation indev @ 0x7f9d32a04940] [2] Micrófono del MacBook Pro
	[in#0 @ 0x7f9d32a04340] Error opening input: Input/output error
	Error opening input file .
	Error opening input files: Input/output error
	(whisper_env) ┌<▪> ~/g/openai-whisper-test
	└➤ ffmpeg -f avfoundation -i ":2" -c:a pcm_s32le -ar 96000 -ac 2 -b:a 3072k grabacion_alta_calidad.wav
	ffmpeg version 7.0.1 Copyright (c) 2000-2024 the FFmpeg developers
	built with Apple clang version 15.0.0 (clang-1500.3.9.4)
	configuration: --prefix=/usr/local/Cellar/ffmpeg/7.0.1 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags='-Wl,-ld_classic' --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libharfbuzz --enable-libjxl --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libspeex --enable-libsoxr --enable-libzmq --enable-libzimg --disable-libjack --disable-indev=jack --enable-videotoolbox --enable-audiotoolbox
	libavutil 59. 8.100 / 59. 8.100
	libavcodec 61. 3.100 / 61. 3.100
	libavformat 61. 1.100 / 61. 1.100
	libavdevice 61. 1.100 / 61. 1.100
	libavfilter 10. 1.100 / 10. 1.100
	libswscale 8. 1.100 / 8. 1.100
	libswresample 5. 1.100 / 5. 1.100
	libpostproc 58. 1.100 / 58. 1.100
	2024-08-01 13:00:19.190 ffmpeg[6320:174001] WARNING: Add NSCameraUseContinuityCameraDeviceType to your Info.plist to use AVCaptureDeviceTypeContinuityCamera.
	Input #0, avfoundation, from ':2':
	Duration: N/A, start: 10095.394354, bitrate: 1536 kb/s
	Stream #0:0: Audio: pcm_f32le, 48000 Hz, mono, flt, 1536 kb/s
	Stream mapping:
	Stream #0:0 -> #0:0 (pcm_f32le (native) -> pcm_s32le (native))
	Press [q] to stop, [?] for help
	Output #0, wav, to 'grabacion_alta_calidad.wav':
	Metadata:
	ISFT : Lavf61.1.100
	Stream #0:0: Audio: pcm_s32le ([1][0][0][0] / 0x0001), 96000 Hz, stereo, s32, 6144 kb/s
	Metadata:
	encoder : Lavc61.3.100 pcm_s32le
	[out#0/wav @ 0x7fb7758053c0] video:0KiB audio:36632KiB subtitle:0KiB other streams:0KiB global headers:0KiB muxing overhead: 0.000272%
	size= 36632KiB time=00:00:53.57 bitrate=5600.9kbits/s speed= 1x
	Exiting normally, received signal 2.
	(whisper_env) ┌<▪> ~/g/openai-whisper-test
	└➤ ls
	grabacion_alta_calidad.wav output.srt output.txt output.wav party-crowd-daniel_simon.srt party-crowd-daniel_simon.txt party-crowd-daniel_simon.wav pyproject.toml
	output.json output.tsv output.vtt party-crowd-daniel_simon.json party-crowd-daniel_simon.tsv party-crowd-daniel_simon.vtt poetry.lock whisper_env
	(whisper_env) ┌<▸> ~/g/openai-whisper-test
	└➤ afplay grabacion_alta_calidad.wav
	^C
	(whisper_env) ┌<▪> ~/g/openai-whisper-test
	└➤ whisper --task transcribe --verbose True --language Spanish --threads 4 grabacion_alta_calidad.wav
	/Users/aironman/git/openai-whisper-test/whisper_env/lib/python3.11/site-packages/whisper/transcribe.py:115: UserWarning: FP16 is not supported on CPU; using FP32 instead
	warnings.warn("FP16 is not supported on CPU; using FP32 instead")
	[00:00.000 --> 00:10.000] Hola, esto es una prueba. Vamos a grabar un audio de alta calidad.
	[00:10.000 --> 00:17.000] Hola, me la malo el sol. Sí, no, se acabó.
	[00:17.000 --> 00:22.000] El lado, el lado.
	[00:22.000 --> 00:25.000] Vamos a grabar.
	[00:25.000 --> 00:28.000] Cuando te doy un poco eso.
	[00:28.000 --> 00:32.000] Está batellada.
	[00:32.000 --> 00:35.000] Está batellada.
	[00:35.000 --> 00:48.000] Está batellada.
	(whisper_env) ┌<▸> ~/g/openai-whisper-test
	└➤