ncouture · April 19, 2018 19:03
diff --git a/audio-transriber.py b/audio-transriber.py

 import subprocess
 import argparse
 import base64
 import json

 """
 Currently uses Google's cloud speech API
 """
 from googleapiclient import discovery
 import httplib2
 from oauth2client.client import GoogleCredentials


 """
 You need the following 
    - brew install ffmpeg
    - pip install --upgrade gcloud
    - pip install --upgrade google-api-python-client
    - env variables: 
        - export GCLOUD_PROJECT=test-174819 <-- project on my personal goodle cloud acc for testing
        - export GOOGLE_APPLICATION_CREDENTIALS=test-c089fc77385e.json <--- Service key (ping me for it until we have an official one )

 """
 DISCOVERY_URL = ('https://{api}.googleapis.com/$discovery/rest?'
                 'version={apiVersion}')

 def extract_audio(video_file, speech_file):
    # Yuck I know... a WIP -_-
    command = "ffmpeg -i "+ str(video_file) + " -ab 160k -ac 1 -ar 44100 -vn " + str(speech_file)
    subprocess.call(command, shell=True)


 def get_speech_service():
    credentials = GoogleCredentials.get_application_default().create_scoped(
        ['https://www.googleapis.com/auth/cloud-platform'])
    http = httplib2.Http()
    credentials.authorize(http)

    return discovery.build(
        'speech', 'v1', http=http, discoveryServiceUrl=DISCOVERY_URL)


 def main(video_file, speech_file):
    """Transcribe the given audio file extracted from the video file.

    Args:
        video_file: the name of the video stream to extract audio from
        speech_file: the name of the audio file.
    """
    extract_audio(video_file, speech_file)
    with open(speech_file, 'rb') as speech:
        speech_content = base64.b64encode(speech.read())

    service = get_speech_service()
    # print dir(service.speech())
    service_request = service.speech().recognize(
        body={
            'config': {
                'encoding': 'LINEAR16',  # raw 16-bit signed LE samples
                'languageCode': 'en-US',  # a BCP-47 language tag
            },
            'audio': {
                'content': speech_content.decode('UTF-8')
                }
            })
    response = service_request.execute()
    print(json.dumps(response))

 if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('video_file', help='Full path of video file to get audio from')
    parser.add_argument('speech_file', help='Full path of audio file to be recognized')
    args = parser.parse_args()
    main(args.video_file, args.speech_file)

	import subprocess
	import argparse
	import base64
	import json

	"""
	Currently uses Google's cloud speech API
	"""
	from googleapiclient import discovery
	import httplib2
	from oauth2client.client import GoogleCredentials


	"""
	You need the following
	- brew install ffmpeg
	- pip install --upgrade gcloud
	- pip install --upgrade google-api-python-client
	- env variables:
	- export GCLOUD_PROJECT=test-174819 <-- project on my personal goodle cloud acc for testing
	- export GOOGLE_APPLICATION_CREDENTIALS=test-c089fc77385e.json <--- Service key (ping me for it until we have an official one )

	"""
	DISCOVERY_URL = ('https://{api}.googleapis.com/$discovery/rest?'
	'version={apiVersion}')

	def extract_audio(video_file, speech_file):
	# Yuck I know... a WIP -_-
	command = "ffmpeg -i "+ str(video_file) + " -ab 160k -ac 1 -ar 44100 -vn " + str(speech_file)
	subprocess.call(command, shell=True)


	def get_speech_service():
	credentials = GoogleCredentials.get_application_default().create_scoped(
	['https://www.googleapis.com/auth/cloud-platform'])
	http = httplib2.Http()
	credentials.authorize(http)

	return discovery.build(
	'speech', 'v1', http=http, discoveryServiceUrl=DISCOVERY_URL)


	def main(video_file, speech_file):
	"""Transcribe the given audio file extracted from the video file.

	Args:
	video_file: the name of the video stream to extract audio from
	speech_file: the name of the audio file.
	"""
	extract_audio(video_file, speech_file)
	with open(speech_file, 'rb') as speech:
	speech_content = base64.b64encode(speech.read())

	service = get_speech_service()
	# print dir(service.speech())
	service_request = service.speech().recognize(
	body={
	'config': {
	'encoding': 'LINEAR16', # raw 16-bit signed LE samples
	'languageCode': 'en-US', # a BCP-47 language tag
	},
	'audio': {
	'content': speech_content.decode('UTF-8')
	}
	})
	response = service_request.execute()
	print(json.dumps(response))

	if __name__ == '__main__':
	parser = argparse.ArgumentParser()
	parser.add_argument('video_file', help='Full path of video file to get audio from')
	parser.add_argument('speech_file', help='Full path of audio file to be recognized')
	args = parser.parse_args()
	main(args.video_file, args.speech_file)