Skip to content

Instantly share code, notes, and snippets.

@pdu
Created January 2, 2013 07:40
Show Gist options
  • Select an option

  • Save pdu/4432905 to your computer and use it in GitHub Desktop.

Select an option

Save pdu/4432905 to your computer and use it in GitHub Desktop.
giving n sentences, remove common phrases in each sentence, a phrase is defined by 3 or more consecutive words glassdoor: imo.im
#!/usr/bin/python
def remove(buf):
dict = {}
wordslist = []
for line in buf:
words = line.split()
wordslist.append(words)
for i in xrange( len(words) - 2 ):
phrase = ' '.join(words[i:i+3])
try:
dict[phrase] += 1
except:
dict[phrase] = 1
ret = []
for words in wordslist:
flag = [True] * len(words)
for i in xrange( len(words) - 2 ):
phrase = ' '.join(words[i:i+3])
if dict[phrase] == len(buf):
flag[i] = flag[i+1] = flag[i+2] = False
tmp = [words[i] for i in xrange( len(words) ) if flag[i]]
ret.append(' '.join(tmp))
return ret
def main():
buf = ['i love this game', 'i dont love this game', 'he love this game', 'she love this game too']
ret = remove(buf)
print ret
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment