Created
March 4, 2011 08:24
-
-
Save alumae/854330 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'gst' | |
require 'thread' | |
Gst.init | |
class Recognizer | |
def initialize() | |
@result = "" | |
# construct pipeline | |
@pipeline = Gst::Parse.launch("appsrc name=appsrc ! audioconvert ! audioresample ! pocketsphinx name=asr ! filesink location=file.wav") | |
# define input audio properties | |
@appsrc = @pipeline.get_child("appsrc") | |
caps = Gst::Caps.parse("audio/x-raw-int,rate=16000,channels=1,signed=true,endianness=1234,depth=16,width=16") | |
@appsrc.set_property("caps", caps) | |
# define behaviour for ASR output | |
asr = @pipeline.get_child("asr") | |
asr.signal_connect('partial_result') { |asr, text, uttid| | |
#puts "PARTIAL: " + text | |
@result = text | |
} | |
asr.signal_connect('result') { |asr, text, uttid| | |
#puts "FINAL: " + text | |
@result = text | |
@queue.push(1) | |
} | |
@queue = Queue.new | |
# This returns when ASR engine has been fully loaded | |
asr.set_property('configured', true) | |
#@pipeline.pause | |
end | |
# Get current (possibly partial) recognition result | |
def result | |
@result | |
end | |
# Call this before starting a new recognition | |
def clear() | |
@result = "" | |
@queue.clear | |
@pipeline.pause | |
end | |
# Feed new chunk of audio data to the recognizer | |
def feed_data(data) | |
@pipeline.play | |
buffer=Gst::Buffer.new | |
buffer.data = data | |
@appsrc.push_buffer(buffer) | |
end | |
# Notify recognizer of utterance end | |
def feed_end() | |
@appsrc.end_of_stream() | |
end | |
# Wait for the recognizer to recognize the current utterance | |
# Returns the final recognition result | |
def wait_final_result() | |
@queue.pop | |
@pipeline.stop | |
return @result | |
end | |
end | |
rec = Recognizer.new | |
files = ["/home/tanel/devel/cmusphinx/trunk/pocketsphinx/test/data/goforward.raw", "/home/tanel/devel/cmusphinx/trunk/pocketsphinx/test/data/something.raw"] | |
files.each { |f| | |
rec.clear() | |
File.open(f, "r") { |f| | |
# read data in chunks and feed to recognizer | |
while buff = f.read(2*16000) | |
puts "." | |
rec.feed_data(buff) | |
# this is not really needed, it simulates time delay from real audio stream | |
sleep(1) | |
puts "Current result: #{rec.result}" | |
end | |
} | |
rec.feed_end() | |
result = rec.wait_final_result() | |
puts "OK, recognition done: " + result | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment