Skip to content

Instantly share code, notes, and snippets.

@khanof89
Created July 12, 2018 16:10
Show Gist options
  • Save khanof89/1c97f178dace3712991d114f95a3da2c to your computer and use it in GitHub Desktop.
Save khanof89/1c97f178dace3712991d114f95a3da2c to your computer and use it in GitHub Desktop.
import sys
import os
import speech_recognition as sr
from tqdm import tqdm
with open("/var/www/html/speech_to_text/api-key.json") as f:
GOOGLE_CLOUD_SPEECH_CREDENTIALS = f.read()
try:
folderName = sys.argv[1]
folderName = folderName + '/'
with open("logs", "w") as f:
f.write('got foldername ' + folderName)
print("foldername " + folderName)
except IndexError:
print("Usage:\npython3 slow.py folder_name")
with open("logs", "w") as f:
f.write("caught exception")
sys.exit(1)
r = sr.Recognizer()
files = sorted(os.listdir(folderName))
all_text = []
print(files)
try:
for f in tqdm(files):
print('for f in tqdm ' + folderName + f)
name = folderName + f
print('name ' + name)
# Load audio file
with sr.AudioFile(name) as source:
print('inside source')
audio = r.record(source)
print('done source')
# Transcribe audio file
print('credentials ' + GOOGLE_CLOUD_SPEECH_CREDENTIALS)
text = r.recognize_google_cloud(audio, credentials_json=GOOGLE_CLOUD_SPEECH_CREDENTIALS)
print('took credentials')
all_text.append(text)
except Exception, e:
print('exception in text=r.recognize ' + str(e))
transcript = ""
for i, t in enumerate(all_text):
total_seconds = i * 30
# Cool shortcut from:
# https://stackoverflow.com/questions/775049/python-time-seconds-to-hms
# to get hours, minutes and seconds
m, s = divmod(total_seconds, 60)
h, m = divmod(m, 60)
# Format time as h:m:s - 30 seconds of text
transcript = transcript + "{:0>2d}:{:0>2d}:{:0>2d} {}\n".format(h, m, s, t)
print(transcript)
print(transcript)
with open("transcript.txt", "w") as f:
f.write(transcript)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment