Skip to content

Instantly share code, notes, and snippets.

@bmaland
Created May 15, 2009 07:15
Show Gist options
  • Select an option

  • Save bmaland/112080 to your computer and use it in GitHub Desktop.

Select an option

Save bmaland/112080 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# Simple algorithm for clustering WordNet synsets. Requires Python 2.5 or 2.6,
# in addition to the NLTK toolkit which is available at http://www.nltk.org/.
import nltk
from nltk.corpus import wordnet as wn
from nltk.corpus import wordnet_ic
def cluster_senses(word, treshold = 0.30,
ic_corpus = wordnet_ic.ic('ic-treebank.dat')):
"""
"""
synsets = wn.synsets(word, pos=wn.NOUN) # nouns only
clusters = [[synsets.pop()]]
for s in synsets:
added = False
for c in clusters:
for ss in c:
if s.lin_similarity(ss, ic_corpus) > treshold:
c.append(s)
added = True
break
# Make sure that a synset is only added to one cluster
if added:
break
if not added:
# the synset doesn't fit in any of the existing clusters so we
# create a new one
clusters += [[s]]
return clusters
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment