Skip to content

Instantly share code, notes, and snippets.

@alumae
Created March 4, 2011 08:24
Show Gist options
  • Save alumae/854330 to your computer and use it in GitHub Desktop.
Save alumae/854330 to your computer and use it in GitHub Desktop.
require 'gst'
require 'thread'
Gst.init
class Recognizer
def initialize()
@result = ""
# construct pipeline
@pipeline = Gst::Parse.launch("appsrc name=appsrc ! audioconvert ! audioresample ! pocketsphinx name=asr ! filesink location=file.wav")
# define input audio properties
@appsrc = @pipeline.get_child("appsrc")
caps = Gst::Caps.parse("audio/x-raw-int,rate=16000,channels=1,signed=true,endianness=1234,depth=16,width=16")
@appsrc.set_property("caps", caps)
# define behaviour for ASR output
asr = @pipeline.get_child("asr")
asr.signal_connect('partial_result') { |asr, text, uttid|
#puts "PARTIAL: " + text
@result = text
}
asr.signal_connect('result') { |asr, text, uttid|
#puts "FINAL: " + text
@result = text
@queue.push(1)
}
@queue = Queue.new
# This returns when ASR engine has been fully loaded
asr.set_property('configured', true)
#@pipeline.pause
end
# Get current (possibly partial) recognition result
def result
@result
end
# Call this before starting a new recognition
def clear()
@result = ""
@queue.clear
@pipeline.pause
end
# Feed new chunk of audio data to the recognizer
def feed_data(data)
@pipeline.play
buffer=Gst::Buffer.new
buffer.data = data
@appsrc.push_buffer(buffer)
end
# Notify recognizer of utterance end
def feed_end()
@appsrc.end_of_stream()
end
# Wait for the recognizer to recognize the current utterance
# Returns the final recognition result
def wait_final_result()
@queue.pop
@pipeline.stop
return @result
end
end
rec = Recognizer.new
files = ["/home/tanel/devel/cmusphinx/trunk/pocketsphinx/test/data/goforward.raw", "/home/tanel/devel/cmusphinx/trunk/pocketsphinx/test/data/something.raw"]
files.each { |f|
rec.clear()
File.open(f, "r") { |f|
# read data in chunks and feed to recognizer
while buff = f.read(2*16000)
puts "."
rec.feed_data(buff)
# this is not really needed, it simulates time delay from real audio stream
sleep(1)
puts "Current result: #{rec.result}"
end
}
rec.feed_end()
result = rec.wait_final_result()
puts "OK, recognition done: " + result
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment