Created
October 14, 2022 07:57
-
-
Save ahmadrosid/aa44bab5518cc3e3408b7b5fe79dc50a to your computer and use it in GitHub Desktop.
Transribe youtube video using vosk.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import re | |
| import yt_dlp | |
| def download_wav(url): | |
| ydl_opts={} | |
| with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
| info_dict = ydl.extract_info(url, download=False) | |
| video_title = info_dict['title'] | |
| video_name = re.sub('[\\\\/*?:"<>|]', '', video_title) | |
| name = video_name | |
| ydl_opts = { | |
| 'format': 'm4a/bestaudio/best', | |
| 'noplaylist': True, | |
| 'continue_dl': True, | |
| 'outtmpl': f'./{name}.wav', | |
| 'postprocessors': [{ | |
| 'key': 'FFmpegExtractAudio', | |
| 'preferredcodec': 'wav', | |
| 'preferredquality': '192', | |
| }], | |
| 'geobypass':True, | |
| 'ffmpeg_location':'/opt/homebrew/bin/ffmpeg' | |
| } | |
| with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
| error_code = ydl.download(url) | |
| print(error_code) | |
| file_name = "{}.wav".format(video_name) | |
| return file_name |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| import json | |
| import wave | |
| from vosk import Model, KaldiRecognizer | |
| from pydub import AudioSegment | |
| from tqdm import tqdm | |
| from download import download_wav | |
| def saveListToFile(values, file_path): | |
| file1 = open(file_path,"w") | |
| for val in values: | |
| file1.writelines("{}\n".format(val)) | |
| file1.close() | |
| CHANNELS=1 | |
| def voice_recognition(filename): | |
| model = Model(model_name="vosk-model-en-us-0.22") | |
| wf = wave.open(filename, "rb") | |
| frame_rate = wf.getframerate() | |
| rec = KaldiRecognizer(model, frame_rate) | |
| rec.SetWords(True) | |
| wav_audio = AudioSegment.from_wav(filename) | |
| wav_audio = wav_audio.set_channels(CHANNELS) | |
| wav_audio = wav_audio.set_frame_rate(frame_rate) | |
| step = 30 * 1000 # 5 seconds | |
| transcript = [] | |
| for i in tqdm(range(0, len(wav_audio), step)): | |
| segment = wav_audio[i:i+step] | |
| rec.AcceptWaveform(segment.raw_data) | |
| result = rec.Result() | |
| text = json.loads(result)["text"] | |
| transcript.append(text) | |
| return transcript | |
| url = "https://www.youtube.com/shorts/03tVbko1RaY" | |
| file_name = download_wav(url) | |
| result = voice_recognition(filename=file_name) | |
| saveListToFile(result, file_name.replace("wav", "txt")) | |
| os.remove(file_name) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment