rjurney · December 16, 2015 14:20
diff --git a/convert_to_scikit.py b/convert_to_scikit.py
 import sys, os
 import numpy as np
 from collections import defaultdict
 from operator import itemgetter
 from sklearn.naive_bayes import GaussianNB

 # live    1       classic pop and rock
 # onli    2       classic pop and rock
 # tri     1       classic pop and rock
 # keep    3       classic pop and rock
 # dream   2       classic pop and rock

 f = open('/tmp/genre_lyrics.txt/part-r-00000')
 genre_tokens = defaultdict(lambda : defaultdict(dict))
 X = []
 y = []
 all_keys = {}
 for line in f:
  token, count, genre = line[:-1].split('\t')
  all_keys[token] = 1
  genre_tokens[genre][token] = (float(count), genre)

 for key in all_keys:
  for genre in sorted(genre_tokens):
    if key in genre_tokens[genre]:
      X.append([genre_tokens[genre][key][0]])
    else:
      X.append([0.0]) # Laplace here
    y.append(genre)

 gnb = GaussianNB()
 y_pred = gnb.fit(X, y).predict(X)
	import sys, os
	import numpy as np
	from collections import defaultdict
	from operator import itemgetter
	from sklearn.naive_bayes import GaussianNB

	# live 1 classic pop and rock
	# onli 2 classic pop and rock
	# tri 1 classic pop and rock
	# keep 3 classic pop and rock
	# dream 2 classic pop and rock

	f = open('/tmp/genre_lyrics.txt/part-r-00000')
	genre_tokens = defaultdict(lambda : defaultdict(dict))
	X = []
	y = []
	all_keys = {}
	for line in f:
	token, count, genre = line[:-1].split('\t')
	all_keys[token] = 1
	genre_tokens[genre][token] = (float(count), genre)

	for key in all_keys:
	for genre in sorted(genre_tokens):
	if key in genre_tokens[genre]:
	X.append([genre_tokens[genre][key][0]])
	else:
	X.append([0.0]) # Laplace here
	y.append(genre)

	gnb = GaussianNB()
	y_pred = gnb.fit(X, y).predict(X)
No results found