shotasenga · June 22, 2019 18:49 · shotasenga · Jun 22, 2019
diff --git a/mp3_to_text.py b/mp3_to_text.py
 import os
 import re
 import wave
 import tempfile
 from pydub import AudioSegment
 from google.cloud import speech
 from google.cloud.speech import enums
 from google.cloud.speech import types

 WORK_DIR = tempfile.mkdtemp()

 def mp3_to_text(fpath):
    """
    Text from a wave file by using GCP Speach to text API

    Params:
        (str) fpath: path to the mp3 file
    Returns:
        (str[]) recognized texts
    """
    wav_path = mp3_to_wav(fpath)
    return wav_to_text(wav_path)


 def wav_to_text(fpath):
    """
    Text from a wave file by using GCP Speach to text API

    Params:
        (str) fpath: path to the mp3 file
    Returns:
        (str[]) recognized texts
    """
    frame_rate = get_frame_rate(fpath)

    client = speech.SpeechClient()
    with open(fpath, 'rb') as fp:
        content = fp.read()
        audio = types.RecognitionAudio(content=content)

    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=frame_rate,
        language_code='en-US'
    )

    response = client.recognize(config, audio)
    
    text = []
    for result in response.results:
        text.append(result.alternatives[0].transcript)

    return text


 def get_frame_rate(fpath):
    """
    Get frame rate of a wave file

    Params:
        (str) fpath: path to a wav file
    Returns:
        (int) the frame rate
    """
    frame_rate = None
    with wave.open(fpath, 'rb') as fp:
        frame_rate = fp.getframerate()

    return frame_rate


 def mp3_to_wav(fpath):
    """
    Convert mp3 to wav and returns the path of the created file

    Params:
        (str) fpath: path to the mp3 file
    Returns:
        (str) Created wav file path
    """
    basename = os.path.basename(fpath)
    new_fpath = WORK_DIR + re.sub(r"\.mp3$", ".wav", basename)

    sound = AudioSegment.from_mp3(fpath)
    sound = sound.set_channels(1)
    sound.export(new_fpath, format="wav", parameters=["-ac", "1"])
    return new_fpath


 if __name__ == "__main__":
    print(mp3_to_text("audio/002/2.mp3"))
diff --git a/Pipfile b/Pipfile
 [[source]]
 name = "pypi"
 url = "https://pypi.org/simple"
 verify_ssl = true

 [dev-packages]

 [packages]
 google-cloud-speech = "*"
 pydub = "*"

 [requires]
 python_version = "3.7"
	import os
	import re
	import wave
	import tempfile
	from pydub import AudioSegment
	from google.cloud import speech
	from google.cloud.speech import enums
	from google.cloud.speech import types

	WORK_DIR = tempfile.mkdtemp()

	def mp3_to_text(fpath):
	"""
	Text from a wave file by using GCP Speach to text API

	Params:
	(str) fpath: path to the mp3 file
	Returns:
	(str[]) recognized texts
	"""
	wav_path = mp3_to_wav(fpath)
	return wav_to_text(wav_path)


	def wav_to_text(fpath):
	"""
	Text from a wave file by using GCP Speach to text API

	Params:
	(str) fpath: path to the mp3 file
	Returns:
	(str[]) recognized texts
	"""
	frame_rate = get_frame_rate(fpath)

	client = speech.SpeechClient()
	with open(fpath, 'rb') as fp:
	content = fp.read()
	audio = types.RecognitionAudio(content=content)

	config = types.RecognitionConfig(
	encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
	sample_rate_hertz=frame_rate,
	language_code='en-US'
	)

	response = client.recognize(config, audio)

	text = []
	for result in response.results:
	text.append(result.alternatives[0].transcript)

	return text


	def get_frame_rate(fpath):
	"""
	Get frame rate of a wave file

	Params:
	(str) fpath: path to a wav file
	Returns:
	(int) the frame rate
	"""
	frame_rate = None
	with wave.open(fpath, 'rb') as fp:
	frame_rate = fp.getframerate()

	return frame_rate


	def mp3_to_wav(fpath):
	"""
	Convert mp3 to wav and returns the path of the created file

	Params:
	(str) fpath: path to the mp3 file
	Returns:
	(str) Created wav file path
	"""
	basename = os.path.basename(fpath)
	new_fpath = WORK_DIR + re.sub(r"\.mp3$", ".wav", basename)

	sound = AudioSegment.from_mp3(fpath)
	sound = sound.set_channels(1)
	sound.export(new_fpath, format="wav", parameters=["-ac", "1"])
	return new_fpath


	if __name__ == "__main__":
	print(mp3_to_text("audio/002/2.mp3"))
	[[source]]
	name = "pypi"
	url = "https://pypi.org/simple"
	verify_ssl = true

	[dev-packages]

	[packages]
	google-cloud-speech = "*"
	pydub = "*"

	[requires]
	python_version = "3.7"