andreaschandra · May 14, 2018 07:25
diff --git a/gistfile1.txt b/gistfile1.txt
 # Load some categories from the training set
 categories = [
    'alt.atheism',
    'talk.religion.misc',
    'comp.graphics',
    'sci.space',
 ]
 # Uncomment the following to do the analysis on all the categories
 # categories = None

 print("Loading 20 newsgroups dataset for categories:")
 print(categories)

 dataset = fetch_20newsgroups(subset='all', categories=categories,
                             shuffle=True, random_state=42)

 print("%d documents" % len(dataset.data))
 print("%d categories" % len(dataset.target_names))
 print()

 labels = dataset.target
 true_k = np.unique(labels).shape[0]
	# Load some categories from the training set
	categories = [
	'alt.atheism',
	'talk.religion.misc',
	'comp.graphics',
	'sci.space',
	]
	# Uncomment the following to do the analysis on all the categories
	# categories = None

	print("Loading 20 newsgroups dataset for categories:")
	print(categories)

	dataset = fetch_20newsgroups(subset='all', categories=categories,
	shuffle=True, random_state=42)

	print("%d documents" % len(dataset.data))
	print("%d categories" % len(dataset.target_names))
	print()

	labels = dataset.target
	true_k = np.unique(labels).shape[0]