bmaland · May 15, 2009 07:15
diff --git a/sense-cluster.py b/sense-cluster.py
 #!/usr/bin/env python

 # Simple algorithm for clustering WordNet synsets. Requires Python 2.5 or 2.6,
 # in addition to the NLTK toolkit which is available at http://www.nltk.org/.

 import nltk
 from nltk.corpus import wordnet as wn
 from nltk.corpus import wordnet_ic

 def cluster_senses(word, treshold = 0.30,
                      ic_corpus = wordnet_ic.ic('ic-treebank.dat')):
    """
    """
    synsets = wn.synsets(word, pos=wn.NOUN) # nouns only
    clusters = [[synsets.pop()]]
    for s in synsets:
        added = False
        for c in clusters:
            for ss in c:
                if s.lin_similarity(ss, ic_corpus) > treshold:
                    c.append(s)
                    added = True
                    break
            # Make sure that a synset is only added to one cluster
            if added:
                break
        if not added:
            # the synset doesn't fit in any of the existing clusters so we
            # create a new one
            clusters += [[s]]

    return clusters
	#!/usr/bin/env python

	# Simple algorithm for clustering WordNet synsets. Requires Python 2.5 or 2.6,
	# in addition to the NLTK toolkit which is available at http://www.nltk.org/.

	import nltk
	from nltk.corpus import wordnet as wn
	from nltk.corpus import wordnet_ic

	def cluster_senses(word, treshold = 0.30,
	ic_corpus = wordnet_ic.ic('ic-treebank.dat')):
	"""
	"""
	synsets = wn.synsets(word, pos=wn.NOUN) # nouns only
	clusters = [[synsets.pop()]]
	for s in synsets:
	added = False
	for c in clusters:
	for ss in c:
	if s.lin_similarity(ss, ic_corpus) > treshold:
	c.append(s)
	added = True
	break
	# Make sure that a synset is only added to one cluster
	if added:
	break
	if not added:
	# the synset doesn't fit in any of the existing clusters so we
	# create a new one
	clusters += [[s]]

	return clusters
No results found