Created
April 25, 2021 11:15
-
-
Save hirocarma/97319b0b5ef24dc955557bf450a53f5e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| import os | |
| import sys | |
| import numpy as np | |
| import wave | |
| import struct | |
| import math | |
| import speech_recognition as sr | |
| def wav2txt(wav_fname): | |
| r = sr.Recognizer() | |
| with sr.AudioFile(wav_fname) as source: | |
| audio = r.record(source) | |
| try: | |
| text = r.recognize_google(audio, language="ja-JP") | |
| except sr.UnknownValueError: | |
| print("Google Web Speech API could not recognize speech.") | |
| except sr.RequestError as e: | |
| print("Could not request Google Web Speech API." " {0}".format(e)) | |
| else: | |
| return text | |
| def cut_wav(wav_fname, time, out_dir): | |
| wr = wave.open(wav_fname, 'r') | |
| ch = wr.getnchannels() | |
| width = wr.getsampwidth() | |
| fr = wr.getframerate() | |
| fn = wr.getnframes() | |
| total_time = 1.0 * fn / fr | |
| integer = math.floor(total_time) | |
| t = int(time) | |
| frames = int(ch * fr * t) | |
| num_cut = int(integer//t) | |
| print("Frame Rate: ", fr) | |
| print("Frame num: ", fn) | |
| print("Total time: ", total_time) | |
| print("Total time(integer)",integer) | |
| print("Time: ", t) | |
| print("Frames: ", frames) | |
| print("Number of cut: ",num_cut) | |
| data = wr.readframes(wr.getnframes()) | |
| wr.close() | |
| X = np.frombuffer(data, dtype= "int16") | |
| out_txtf = out_dir + '/' + 'dialogue.txt' | |
| for i in range(num_cut): | |
| out_wavf = out_dir + '/' + str(i) + '.wav' | |
| start_cut = i*frames | |
| end_cut = i*frames + frames | |
| print(start_cut) | |
| print(end_cut) | |
| Y = X[start_cut:end_cut] | |
| outd = struct.pack("h" * len(Y), *Y) | |
| ww = wave.open(out_wavf, 'w') | |
| ww.setnchannels(ch) | |
| ww.setsampwidth(width) | |
| ww.setframerate(fr) | |
| ww.writeframes(outd) | |
| ww.close() | |
| text = str(wav2txt(out_wavf)) | |
| if not text == 'None': | |
| text = text.replace(' ', '\n') | |
| f = open(out_txtf, 'a') | |
| f.write('#-- ' + str(i) + 'min' +'\n' + text + '\n') | |
| f.close() | |
| if __name__ == '__main__': | |
| wav_fname = sys.argv[1] | |
| out_dir = os.path.splitext(os.path.basename(wav_fname))[0] | |
| if os.path.exists(out_dir) == False: | |
| os.mkdir(out_dir) | |
| cut_wav(wav_fname, 60, out_dir) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment