-
-
Save ncouture/1e93524f0cd9a758dc653183735945fb to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import subprocess | |
import argparse | |
import base64 | |
import json | |
""" | |
Currently uses Google's cloud speech API | |
""" | |
from googleapiclient import discovery | |
import httplib2 | |
from oauth2client.client import GoogleCredentials | |
""" | |
You need the following | |
- brew install ffmpeg | |
- pip install --upgrade gcloud | |
- pip install --upgrade google-api-python-client | |
- env variables: | |
- export GCLOUD_PROJECT=test-174819 <-- project on my personal goodle cloud acc for testing | |
- export GOOGLE_APPLICATION_CREDENTIALS=test-c089fc77385e.json <--- Service key (ping me for it until we have an official one ) | |
""" | |
DISCOVERY_URL = ('https://{api}.googleapis.com/$discovery/rest?' | |
'version={apiVersion}') | |
def extract_audio(video_file, speech_file): | |
# Yuck I know... a WIP -_- | |
command = "ffmpeg -i "+ str(video_file) + " -ab 160k -ac 1 -ar 44100 -vn " + str(speech_file) | |
subprocess.call(command, shell=True) | |
def get_speech_service(): | |
credentials = GoogleCredentials.get_application_default().create_scoped( | |
['https://www.googleapis.com/auth/cloud-platform']) | |
http = httplib2.Http() | |
credentials.authorize(http) | |
return discovery.build( | |
'speech', 'v1', http=http, discoveryServiceUrl=DISCOVERY_URL) | |
def main(video_file, speech_file): | |
"""Transcribe the given audio file extracted from the video file. | |
Args: | |
video_file: the name of the video stream to extract audio from | |
speech_file: the name of the audio file. | |
""" | |
extract_audio(video_file, speech_file) | |
with open(speech_file, 'rb') as speech: | |
speech_content = base64.b64encode(speech.read()) | |
service = get_speech_service() | |
# print dir(service.speech()) | |
service_request = service.speech().recognize( | |
body={ | |
'config': { | |
'encoding': 'LINEAR16', # raw 16-bit signed LE samples | |
'languageCode': 'en-US', # a BCP-47 language tag | |
}, | |
'audio': { | |
'content': speech_content.decode('UTF-8') | |
} | |
}) | |
response = service_request.execute() | |
print(json.dumps(response)) | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser() | |
parser.add_argument('video_file', help='Full path of video file to get audio from') | |
parser.add_argument('speech_file', help='Full path of audio file to be recognized') | |
args = parser.parse_args() | |
main(args.video_file, args.speech_file) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment