Skip to content

Instantly share code, notes, and snippets.

@ncouture
Forked from sheikhfaisalanwar/audio-transriber.py
Created April 19, 2018 19:03
Show Gist options
  • Save ncouture/1e93524f0cd9a758dc653183735945fb to your computer and use it in GitHub Desktop.
Save ncouture/1e93524f0cd9a758dc653183735945fb to your computer and use it in GitHub Desktop.
import subprocess
import argparse
import base64
import json
"""
Currently uses Google's cloud speech API
"""
from googleapiclient import discovery
import httplib2
from oauth2client.client import GoogleCredentials
"""
You need the following
- brew install ffmpeg
- pip install --upgrade gcloud
- pip install --upgrade google-api-python-client
- env variables:
- export GCLOUD_PROJECT=test-174819 <-- project on my personal goodle cloud acc for testing
- export GOOGLE_APPLICATION_CREDENTIALS=test-c089fc77385e.json <--- Service key (ping me for it until we have an official one )
"""
DISCOVERY_URL = ('https://{api}.googleapis.com/$discovery/rest?'
'version={apiVersion}')
def extract_audio(video_file, speech_file):
# Yuck I know... a WIP -_-
command = "ffmpeg -i "+ str(video_file) + " -ab 160k -ac 1 -ar 44100 -vn " + str(speech_file)
subprocess.call(command, shell=True)
def get_speech_service():
credentials = GoogleCredentials.get_application_default().create_scoped(
['https://www.googleapis.com/auth/cloud-platform'])
http = httplib2.Http()
credentials.authorize(http)
return discovery.build(
'speech', 'v1', http=http, discoveryServiceUrl=DISCOVERY_URL)
def main(video_file, speech_file):
"""Transcribe the given audio file extracted from the video file.
Args:
video_file: the name of the video stream to extract audio from
speech_file: the name of the audio file.
"""
extract_audio(video_file, speech_file)
with open(speech_file, 'rb') as speech:
speech_content = base64.b64encode(speech.read())
service = get_speech_service()
# print dir(service.speech())
service_request = service.speech().recognize(
body={
'config': {
'encoding': 'LINEAR16', # raw 16-bit signed LE samples
'languageCode': 'en-US', # a BCP-47 language tag
},
'audio': {
'content': speech_content.decode('UTF-8')
}
})
response = service_request.execute()
print(json.dumps(response))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('video_file', help='Full path of video file to get audio from')
parser.add_argument('speech_file', help='Full path of audio file to be recognized')
args = parser.parse_args()
main(args.video_file, args.speech_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment