twobob · June 18, 2022 15:19 · twobob · Jun 18, 2022
diff --git a/frq.py b/frq.py
 from speech_recognition import *
 from os import walk, path
 import glob
 import wave
 import nltk
 import contextlib
 from pydub import AudioSegment
 from pydub.silence import split_on_silence
 import subprocess

 def split(filepath):
 	sound = AudioSegment.from_file(filepath)
 	chunks = split_on_silence(
 	sound,
 		min_silence_len = 500,
 		silence_thresh = sound.dBFS - 16,
 		keep_silence = 250, # optional
 		)
 	return chunks

 def recognize(audio_file):
 	r = Recognizer()
 	inp = AudioFile(audio_file)
 	with inp as source:
 		audio = r.record(source)
 		try:
 			all_words = r.recognize_google(audio)
 			return all_words
 		except:
 			return "No speech detected"

 def recognize_chunks(chunk):
 	# Create a silence chunk that's 0.5 seconds (or 500 ms) long for padding.
 	silence_chunk = AudioSegment.silent(duration=500)

 	# Add the padding chunk to beginning and end of the entire chunk.
 	audio_chunk = silence_chunk + chunk + silence_chunk

 	# Export the audio chunk with new bitrate.

 	#print("Exporting tempchunk.wav")
 	audio_chunk.export(
 		".//tempchunk.wav",
 		bitrate = "44100",
 		format = "wav"
 	)

 	r = Recognizer()
 	inp = AudioFile(".//tempchunk.wav")
 	with inp as source:
 		audio = r.record(source)
 		try:
 			all_words = r.recognize_google(audio)
 			return all_words
 		except:
 			return "No speech detected"

 def main():
 	start = time.time()
 	root_dir = "./"
 	all_words = {}
 	durations = {}
 	frequency = []
 	duration = 0
 	wav_suffix = ".wav"

 	for filename in glob.iglob(root_dir + "**/*.mp4", recursive=True):
 		new_name = filename.replace('.mp4','')
 		if not path.exists(new_name+wav_suffix):
 			subprocess.call(['ffmpeg', '-i', filename, '-acodec',\
 		 	'pcm_s16le',  '-ac', '1', '-ar', '16000', new_name+wav_suffix])

 	for filename in glob.iglob(root_dir + "**/*.wav", recursive=True):
 		if filename.endswith('.wav') and "tempchunk" not in filename:
 			words = ""
 			if path.exists(filename+".time.txt"):
 				myfile = open(filename+".time.txt", "r")
 				time_in_file = myfile.readline()
 				print(time_in_file)
 				duration = float(time_in_file)
 			else:
 				with contextlib.closing(wave.open(filename,'r')) as f:
 					frames = f.getnframes()
 					rate = f.getframerate()
 					duration = frames / float(rate)
 					with open(filename+".time.txt", "w") as o:
 						dur = str(duration)
 						o.write(dur + '\n')
 			durations[filename] = duration
 			print(filename, duration, "total time")
 			print(filename, 'processing')
 # we need to split files that are longer than like 2 minutes
 # minimum chunk length
 			target_length = 25 * 1000 # 25 seconds

 			if path.exists(filename+".txt"):
 				with open(filename+".txt", 'r') as fp:
 					for count, line in enumerate(fp):
 						pass
 					print('Total Lines', count + 1)
 				myfile = open(filename+".txt", "r")
 				for line in myfile:
 					words = words + line + ' '
 			else:
 				if duration > 120:
 					print(f"total words so far in array from chunks: ", end ='' )
 					chunks = split(filename)
 					output_chunks = [chunks[0]]
 					for chunk in chunks[1:]:
 						if len(output_chunks[-1]) < target_length:
 							output_chunks[-1] += chunk
 						else:
 							output_chunks.append(chunk)

 					for chunk in output_chunks:
 						tempwords = recognize_chunks(chunk)
 						if tempwords != "No speech detected":
 							words += tempwords
 						#print(tempwords)
 						prtial = str(len(words.split(' ')))
 						print(prtial, end=" ")
 				else:
 					words = recognize(filename)

 			all_words[filename] = words
 			if not path.exists(filename+".txt"):
 				with open(filename+".txt", "w") as o:
 				    for i in words.split(' '):
 				        o.write(i + ' \n')
 			if not path.exists(filename+".distribution.txt"):
 				with open(filename+".distribution.txt", "w") as o:
 					# create a frequency distribution
 					fdist = nltk.FreqDist(all_words[filename].split(' '))
 					# print the top 150 most spoken words
 					o.write("word frequency report for "+ filename.replace('wav','')+"\n\n")
 					for w, count in fdist.most_common(150):
 						if not w.isspace():
 							report = w.ljust(20) + str(count) + " \n"
 							o.write(report)
 		else:
 			print(filename, 'skipped')

 	for key, value in all_words.items():
 		if value != "No speech detected":
 			mins = int( durations[key] / 60 )
 			avg = value.count(" ")/mins
 			secs = (( durations[key] / 60) - mins) * 60
 			word_totals =   (value.count(" ") - value.count(" ") %2) /2
 			frequency.append("{}: mins:{} secs:{} total words:{} words per min:{}".format(\
 			key.replace(".wav","").ljust(90), str(int(mins)).ljust(2), str(int(secs)).ljust(2), str(int(word_totals)).ljust(2), avg))
 		else:
 			print("No speech detected in {}".format(key))

 	with open("frequency.txt", "w") as o:
 		for i in frequency:
 			print(i+ '\n')
 			o.write(i + '\n')

 	if os.path.exists("tempchunk.wav"):
  		os.remove("tempchunk.wav")

 	print(time.time() - start, "Total execution time")

 if __name__ == '__main__':
 	main()
	from speech_recognition import *
	from os import walk, path
	import glob
	import wave
	import nltk
	import contextlib
	from pydub import AudioSegment
	from pydub.silence import split_on_silence
	import subprocess

	def split(filepath):
	sound = AudioSegment.from_file(filepath)
	chunks = split_on_silence(
	sound,
	min_silence_len = 500,
	silence_thresh = sound.dBFS - 16,
	keep_silence = 250, # optional
	)
	return chunks

	def recognize(audio_file):
	r = Recognizer()
	inp = AudioFile(audio_file)
	with inp as source:
	audio = r.record(source)
	try:
	all_words = r.recognize_google(audio)
	return all_words
	except:
	return "No speech detected"

	def recognize_chunks(chunk):
	# Create a silence chunk that's 0.5 seconds (or 500 ms) long for padding.
	silence_chunk = AudioSegment.silent(duration=500)

	# Add the padding chunk to beginning and end of the entire chunk.
	audio_chunk = silence_chunk + chunk + silence_chunk

	# Export the audio chunk with new bitrate.

	#print("Exporting tempchunk.wav")
	audio_chunk.export(
	".//tempchunk.wav",
	bitrate = "44100",
	format = "wav"
	)

	r = Recognizer()
	inp = AudioFile(".//tempchunk.wav")
	with inp as source:
	audio = r.record(source)
	try:
	all_words = r.recognize_google(audio)
	return all_words
	except:
	return "No speech detected"

	def main():
	start = time.time()
	root_dir = "./"
	all_words = {}
	durations = {}
	frequency = []
	duration = 0
	wav_suffix = ".wav"

	for filename in glob.iglob(root_dir + "*/.mp4", recursive=True):
	new_name = filename.replace('.mp4','')
	if not path.exists(new_name+wav_suffix):
	subprocess.call(['ffmpeg', '-i', filename, '-acodec',\
	'pcm_s16le', '-ac', '1', '-ar', '16000', new_name+wav_suffix])

	for filename in glob.iglob(root_dir + "*/.wav", recursive=True):
	if filename.endswith('.wav') and "tempchunk" not in filename:
	words = ""
	if path.exists(filename+".time.txt"):
	myfile = open(filename+".time.txt", "r")
	time_in_file = myfile.readline()
	print(time_in_file)
	duration = float(time_in_file)
	else:
	with contextlib.closing(wave.open(filename,'r')) as f:
	frames = f.getnframes()
	rate = f.getframerate()
	duration = frames / float(rate)
	with open(filename+".time.txt", "w") as o:
	dur = str(duration)
	o.write(dur + '\n')
	durations[filename] = duration
	print(filename, duration, "total time")
	print(filename, 'processing')
	# we need to split files that are longer than like 2 minutes
	# minimum chunk length
	target_length = 25 * 1000 # 25 seconds

	if path.exists(filename+".txt"):
	with open(filename+".txt", 'r') as fp:
	for count, line in enumerate(fp):
	pass
	print('Total Lines', count + 1)
	myfile = open(filename+".txt", "r")
	for line in myfile:
	words = words + line + ' '
	else:
	if duration > 120:
	print(f"total words so far in array from chunks: ", end ='' )
	chunks = split(filename)
	output_chunks = [chunks[0]]
	for chunk in chunks[1:]:
	if len(output_chunks[-1]) < target_length:
	output_chunks[-1] += chunk
	else:
	output_chunks.append(chunk)

	for chunk in output_chunks:
	tempwords = recognize_chunks(chunk)
	if tempwords != "No speech detected":
	words += tempwords
	#print(tempwords)
	prtial = str(len(words.split(' ')))
	print(prtial, end=" ")
	else:
	words = recognize(filename)

	all_words[filename] = words
	if not path.exists(filename+".txt"):
	with open(filename+".txt", "w") as o:
	for i in words.split(' '):
	o.write(i + ' \n')
	if not path.exists(filename+".distribution.txt"):
	with open(filename+".distribution.txt", "w") as o:
	# create a frequency distribution
	fdist = nltk.FreqDist(all_words[filename].split(' '))
	# print the top 150 most spoken words
	o.write("word frequency report for "+ filename.replace('wav','')+"\n\n")
	for w, count in fdist.most_common(150):
	if not w.isspace():
	report = w.ljust(20) + str(count) + " \n"
	o.write(report)
	else:
	print(filename, 'skipped')

	for key, value in all_words.items():
	if value != "No speech detected":
	mins = int( durations[key] / 60 )
	avg = value.count(" ")/mins
	secs = (( durations[key] / 60) - mins) * 60
	word_totals = (value.count(" ") - value.count(" ") %2) /2
	frequency.append("{}: mins:{} secs:{} total words:{} words per min:{}".format(\
	key.replace(".wav","").ljust(90), str(int(mins)).ljust(2), str(int(secs)).ljust(2), str(int(word_totals)).ljust(2), avg))
	else:
	print("No speech detected in {}".format(key))

	with open("frequency.txt", "w") as o:
	for i in frequency:
	print(i+ '\n')
	o.write(i + '\n')

	if os.path.exists("tempchunk.wav"):
	os.remove("tempchunk.wav")

	print(time.time() - start, "Total execution time")

	if __name__ == '__main__':
	main()