Skip to content

Instantly share code, notes, and snippets.

@josephcc
Created April 27, 2014 02:54
Show Gist options
  • Select an option

  • Save josephcc/11336591 to your computer and use it in GitHub Desktop.

Select an option

Save josephcc/11336591 to your computer and use it in GitHub Desktop.
import string
import operator
Y = lambda x: (2*x) + 1
N = lambda x: (2*x) + 2
train_file = 'hw6-WSJ-1.tags'
test_file = 'hw6-WSJ-2.tags'
SS = '<S>'
ES = '</S>'
def corpus_reader(ifile):
for line in open(ifile):
line = line.strip().split()
#line = [SS] + line + [ES]
for idx in range(len(line)):
yield (line[:idx], line[idx])
def filter2(f, l):
i = []
o = []
for item in l:
if f(item):
i.append(item)
else:
o.append(item)
return i, o
def all_tags(ifile):
tags = open(ifile).readlines()
tags = map(string.strip, tags)
tags = map(string.split, tags)
tags = map(set, tags)
tags = reduce(operator.or_, tags)
tags = list(tags)
punc, pos = filter2(lambda x: len(x) == 1, tags)
return pos, punc
space_joiner = lambda x: ' '.join(x)
same = lambda x: x
all_getter = same
def exact_matcher_factory(multi_getter, target, history_transformer=same, joiner=space_joiner):
def exact_matcher(event):
history, word = event
try:
history = multi_getter(history)
if type(history) not in (tuple, list):
history = (history,)
except:
history = []
history = map(history_transformer, history)
history = joiner(history)
return target == history
return exact_matcher
def set_matcher_factory(multi_getter, target, history_transformer=same):
def set_matcher(event):
history, word = event
try:
history = multi_getter(history)
if type(history) not in (tuple, list):
history = (history,)
except:
history = []
history = map(history_transformer, history)
return target in set(history)
return set_matcher
def set_join_matcher_factory(multi_getter, target, history_transformer=same):
def set_join_matcher(event):
history, word = event
history = multi_getter(history)
history = map(history_transformer, history)
history = set(history)
return len(histhry & target_set) != 0
return set_join_matcher
def history_len_matcher_factory(length, mode=operator.eq):
def history_len_matcher(event):
history, word = event
return mode(len(history), length)
return history_len_matcher
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment