aronwc · December 31, 2015 20:58 · aronwc · Dec 19, 2013
diff --git a/l1.py b/l1.py
 import numpy as np

 from sklearn import linear_model
 from sklearn.datasets import fetch_20newsgroups
 from sklearn.feature_extraction.text import CountVectorizer


 def print_features(coef, names):
    """ Print sorted list of non-zero features/weights. """
    print "\n".join('%s/%.2f' % (names[j], coef[j]) for j in np.argsort(coef)[::-1] if coef[j] != 0)


 if (__name__ == '__main__'):
    rand = np.random.mtrand.RandomState(8675309)
    cats = ['rec.sport.baseball', 'sci.crypt']
    data = fetch_20newsgroups(subset='train',
                              categories=cats,
                              shuffle=True,
                              random_state=rand)
    # Smaller C means fewer features selected.
    clf = linear_model.LogisticRegression(penalty='l1', C=.1)
    vec = CountVectorizer()
    X = vec.fit_transform(data.data)
    clf.fit(X, data.target)
    print_features(clf.coef_[0], vec.get_feature_names())
	import numpy as np

	from sklearn import linear_model
	from sklearn.datasets import fetch_20newsgroups
	from sklearn.feature_extraction.text import CountVectorizer


	def print_features(coef, names):
	""" Print sorted list of non-zero features/weights. """
	print "\n".join('%s/%.2f' % (names[j], coef[j]) for j in np.argsort(coef)[::-1] if coef[j] != 0)


	if (__name__ == '__main__'):
	rand = np.random.mtrand.RandomState(8675309)
	cats = ['rec.sport.baseball', 'sci.crypt']
	data = fetch_20newsgroups(subset='train',
	categories=cats,
	shuffle=True,
	random_state=rand)
	# Smaller C means fewer features selected.
	clf = linear_model.LogisticRegression(penalty='l1', C=.1)
	vec = CountVectorizer()
	X = vec.fit_transform(data.data)
	clf.fit(X, data.target)
	print_features(clf.coef_[0], vec.get_feature_names())
No results found