Created
October 6, 2010 17:57
-
-
Save nishimotz/613786 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# julius4itbc2proxy.py | |
# encoding: utf-8 | |
# tested on Windows XP / Python 2.6 | |
# based on https://gist.github.com/9f927195fa3df2228160 | |
# 2010-10-07 by nishimotz http://ja.nishimotz.com | |
# | |
# Julius4 -(port:10501)- proxy.py -(port:10500)- ITBC2 | |
# | |
# [module.conf] example | |
# | |
# -AM am0 | |
# -h model/phone_m/hmmdefs_ptm_gid.binhmm | |
# -hlist model/phone_m/logicalTri | |
# | |
# -LM lm0 | |
# -d model/lang_m/web.60k.8-8.bingramv4.gz | |
# -v model/lang_m/web.60k.htkdic | |
# -SR search0 am0 lm0 | |
# | |
# -LM lm1 | |
# -d ../julius-lm/newlm.bingram | |
# -v ../julius-lm/newlm.htkdic | |
# -SR search1 am0 lm1 | |
# | |
# -GLOBAL | |
# -gmm gmmdefs.binhmm | |
# -gmmreject "noise,laugh,cough" | |
# -input mic | |
# -rejectshort 300 | |
# -lv 800 | |
# -headmargin 200 | |
# -tailmargin 300 | |
# -zc 60 | |
# -charconv euc sjis | |
# -outcode WLPSC | |
# -module 10501 | |
import socket | |
import threading | |
import time | |
import subprocess | |
import os | |
import re | |
import sys | |
import SocketServer | |
from xml.dom import minidom | |
class RecogManager: | |
"""control julius/julian""" | |
def __init__(self): | |
self.s = socket.socket() | |
time.sleep(2) | |
def connect(self): | |
addr = ('127.0.0.1', 10501) | |
try: | |
self.s.connect(addr) | |
except: | |
return False | |
return True | |
def send(self, msg): | |
try: | |
print "sending: " + msg | |
self.s.setblocking(1) | |
self.s.send(msg) | |
self.s.send("\r") | |
except: | |
print "send error" | |
def receive(self): | |
str = '' | |
retval = [] | |
while True: | |
time.sleep(0.01) | |
r = "" | |
self.s.setblocking(0) | |
try: | |
r = self.s.recv(100) | |
except: | |
pass | |
r = re.sub("\<s\>", "<s>", r) | |
r = re.sub("\</s\>", "</s>", r) | |
str += r | |
if str[-2:] == ".\n": | |
for i in str.split(".\n"): | |
if len(i) > 0: | |
retval.append(i) | |
break | |
return retval | |
def wait_for_elem(self, name): | |
while True: | |
for m in self.receive(): | |
if m.firstChild.nodeName == name: | |
return m | |
time.sleep(0.01) | |
def close(self): | |
self.send('DIE') | |
self.engine_thread.join() | |
return self.s.close() | |
class ProxyHandler(SocketServer.StreamRequestHandler): | |
def _init_recog(self): | |
self.reco = RecogManager() | |
if self.reco.connect() == False: | |
sys.exit() | |
def _close_recog(self): | |
reco.close() | |
def handle(self): | |
self._init_recog() | |
print "connect from:", self.client_address | |
while True: | |
recogs = None | |
flag = False # inside recogout? | |
lines = [] | |
MIN_SCORE = -9999999999.9 | |
score = MIN_SCORE | |
max_score = MIN_SCORE | |
recogout_count = 0 | |
for data in self.reco.receive(): | |
for d in data.splitlines(): | |
# print "d:" + d | |
if re.compile('<RECOGOUT[^>]+>').match(d): | |
# print "case 1" | |
flag = True | |
lines = ['<RECOGOUT>'] | |
elif re.compile('\s+<SHYPO RANK="1" SCORE="[^\"]+">').match(d): | |
m = re.compile('\s+<SHYPO RANK="1" SCORE="([^\"]+)">').match(d) | |
score = float(m.group(1)) | |
lines.append(d) | |
# print "case 2 score=%f" % score | |
elif re.compile('</RECOGOUT>').match(d): | |
lines.append(d) | |
if score > max_score: | |
recogs = lines | |
max_score = score | |
# print "case 3 score updated" | |
else: | |
pass # print "case 3" | |
flag = False | |
score = MIN_SCORE | |
recogout_count += 1 | |
if recogout_count == 2: | |
for r in recogs: | |
self.request.send(r + ".\n"); print r | |
recogs = None | |
recogout_count = 0 | |
else: | |
# print "case 4" | |
if flag: | |
lines.append(d) | |
else: | |
self.request.send(d + ".\n"); print d | |
time.sleep(0.01) | |
self.request.close() | |
self._close_recog() | |
def print_as_sjis(s): | |
print s.encode('SJIS') | |
def to_xml(dom): | |
return re.sub("<\?xml version=\"1\.0\" \?>", "", dom.toxml()) | |
def print_dom(dom): | |
print_as_sjis(to_xml(dom)) | |
if __name__ == '__main__': | |
server = SocketServer.ThreadingTCPServer(('', 10500), ProxyHandler) | |
print 'listening:', server.socket.getsockname() | |
#server.serve_forever() | |
try: | |
server.serve_forever() | |
server.server_close() | |
except KeyboardInterrupt: | |
print "^C detected" | |
server.server_close() | |
finally: | |
#print "server_close()" | |
print "bye" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment