tuxedocat · July 13, 2013 12:38
diff --git a/get_rsmtcs.py b/get_rsmtcs.py
 from collections import defaultdict

 def readfile(filepath):
    """
    A function for reading .e2f, .f2e files

    returns
    -------
    l: list
        if .e2f, ["english word", "f word", "probability"]
    """
    with open(filepath, "r") as f:
        l = [l.split() for l in f.readlines()]
    return l

 def get_vocabulary_from_txt(filepath):
    """
    A function for creating vocabulary list

    returns
    -------
    vocabulary: list
        ["get", "take", ...]
    """
    with open(filepath, "r") as f:
        vocabulary = [w for w in f.readlines if not (w == "\n" or w == "\t")]
    return vocabulary

 def make_parallel_dic(e2f, f2e, vocabulary):
    """
    parameters
    ----------
    e2f: list of lists
    f2e: list of lists
    vocabulary: list

    returns
    -------
    pdic: dictionary of lists
        key: english word, value: list of translated words
    """
    pdic = defaultdict(list)
    e2fd = defaultdict(list)
    f2ed = defaultdict(list)
    for l in e2f:
        if l[0] in vocabulary:
            e2fd[l[0]].append((l[1], float(l[2])))
        else:
            pass
    for l in f2e:
        if l[1] in vocabulary:
            f2ed[l[0]].append((l[1], float(l[2])))
        else:
            pass
    for w, flist in e2fd.iteritems():
        for fwt in flist:
            fwl = expand(fwt, f2ed)
            if fwl:
                pdic[w] += fwl
    return pdic

 def expand(wt, w2wd):
    tl = w2wd[wt[0]] if wt[0] in w2wd else None
    res = []
    if tl:
        for t in tl:
            res.append( (t[0], t[1]*wt[1]) )
    return res

 def postprocess(pd, voc):
    pd2 = defaultdict(list)
    for v, cl in pd.iteritems():
        if v in voc:
            _td = defaultdict(float)
            for t in cl:
                if t[0] in voc:
                    _td[t[0]]+=t[1]
            pd2[v] = sorted(_td.items(), key=lambda x: x[1], reverse=True)
    return pd2


 def combine_mtcs(mtcs, lccs):
    ccs = defaultdict(list)
    for k, ls in lccs.iteritems():
        ccs[k] = [(t[0], 2+float(t[1])) for t in ls]
    for k, ls in mtcs.iteritems():
        if not k in ccs:
            ccs[k] = ls
        else:
            _t = [t for t in ls if not t[0] in [tc[0] for tc in ccs[k]]]
            ccs[k] += sorted(_t, key=lambda x: x[1], reverse=True)
    return ccs
	from collections import defaultdict

	def readfile(filepath):
	"""
	A function for reading .e2f, .f2e files

	returns
	-------
	l: list
	if .e2f, ["english word", "f word", "probability"]
	"""
	with open(filepath, "r") as f:
	l = [l.split() for l in f.readlines()]
	return l

	def get_vocabulary_from_txt(filepath):
	"""
	A function for creating vocabulary list

	returns
	-------
	vocabulary: list
	["get", "take", ...]
	"""
	with open(filepath, "r") as f:
	vocabulary = [w for w in f.readlines if not (w == "\n" or w == "\t")]
	return vocabulary

	def make_parallel_dic(e2f, f2e, vocabulary):
	"""
	parameters
	----------
	e2f: list of lists
	f2e: list of lists
	vocabulary: list

	returns
	-------
	pdic: dictionary of lists
	key: english word, value: list of translated words
	"""
	pdic = defaultdict(list)
	e2fd = defaultdict(list)
	f2ed = defaultdict(list)
	for l in e2f:
	if l[0] in vocabulary:
	e2fd[l[0]].append((l[1], float(l[2])))
	else:
	pass
	for l in f2e:
	if l[1] in vocabulary:
	f2ed[l[0]].append((l[1], float(l[2])))
	else:
	pass
	for w, flist in e2fd.iteritems():
	for fwt in flist:
	fwl = expand(fwt, f2ed)
	if fwl:
	pdic[w] += fwl
	return pdic

	def expand(wt, w2wd):
	tl = w2wd[wt[0]] if wt[0] in w2wd else None
	res = []
	if tl:
	for t in tl:
	res.append( (t[0], t[1]*wt[1]) )
	return res

	def postprocess(pd, voc):
	pd2 = defaultdict(list)
	for v, cl in pd.iteritems():
	if v in voc:
	_td = defaultdict(float)
	for t in cl:
	if t[0] in voc:
	_td[t[0]]+=t[1]
	pd2[v] = sorted(_td.items(), key=lambda x: x[1], reverse=True)
	return pd2


	def combine_mtcs(mtcs, lccs):
	ccs = defaultdict(list)
	for k, ls in lccs.iteritems():
	ccs[k] = [(t[0], 2+float(t[1])) for t in ls]
	for k, ls in mtcs.iteritems():
	if not k in ccs:
	ccs[k] = ls
	else:
	_t = [t for t in ls if not t[0] in [tc[0] for tc in ccs[k]]]
	ccs[k] += sorted(_t, key=lambda x: x[1], reverse=True)
	return ccs