Created
July 26, 2016 17:11
-
-
Save affo/47768253e9bcc28c9492416f7e479619 to your computer and use it in GitHub Desktop.
Naive bayes dish classifier
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import numpy as np | |
# Ok, the point is now to create a naive | |
# bayes model and stop without accounting | |
# for the lab... | |
# We want to model for example the food. | |
# The food can be terrific, bad, not bad, good | |
# or awesome: | |
classes_labels = { | |
0: 'terrific', | |
1: 'bad', | |
2: 'not_bad', | |
3: 'good', | |
4: 'awesome' | |
} | |
# In the last days, we experienced a lot of different dishes, | |
# and we could classify them. | |
# We could distinguish different ingredients: | |
features = np.array(['tomato', 'salad', 'coriander', 'rice', 'potatoes', | |
'beans', 'pork', 'beef', 'bacalao', 'salmon', 'clams', | |
'sugar']) | |
# And this was our experience: | |
def dish_repr(dish): | |
indices = np.nonzero(dish) | |
return features[indices] | |
def gen_dish(): | |
likely = [ | |
('tomato', 'salad'), | |
('tomato', 'salad', 'rice'), | |
('pork', 'clams', 'potatoes'), | |
('bacalao', 'coriander'), | |
('salmon', 'coriander'), | |
('beef', 'coriander'), | |
('pork', 'coriander'), | |
('pork', 'potatoes'), | |
('beef', 'potatoes'), | |
] | |
unlikely = [ | |
('bacalao', 'sugar'), | |
('beef', 'sugar'), | |
('salmon', 'sugar'), | |
('clams', 'sugar'), | |
('pork', 'sugar'), | |
('pork', 'beef'), | |
('pork', 'bacalao'), | |
('beef', 'bacalao'), | |
] | |
def get_ingredients(): | |
return np.random.randint(0, 2, len(features)) | |
def get_class(dish): | |
dish_r = dish_repr(dish) | |
def sublist_exists(sub, l): | |
for el in sub: | |
if not el in l: return False | |
return True | |
no_likely = [sublist_exists(l, dish_r) for l in likely].count(True) | |
no_unlikely = [sublist_exists(u, dish_r) for u in unlikely].count(True) | |
no_cl = len(classes_labels) | |
lik_contrib = len(likely) / float(no_cl) | |
unlik_contrib = len(unlikely) / float(no_cl) | |
base_score = no_cl / 2 | |
score = base_score + int(lik_contrib * no_likely - unlik_contrib * no_unlikely) | |
# normalize | |
if score < 0: score = 0 | |
if score > no_cl: score = no_cl | |
return score | |
dish = get_ingredients() | |
return dish, get_class(dish) | |
NO_DISHES = 1500 | |
x = np.zeros((NO_DISHES, len(features)), dtype=np.int) | |
y = np.zeros((NO_DISHES,), dtype=np.int) | |
for i in xrange(NO_DISHES): | |
x[i, :], y[i] = gen_dish() | |
# Now we can get a classifier. | |
# Let's calculate priors: | |
no_classes = len(classes_labels) | |
priors = np.zeros((no_classes,), dtype=np.float64) | |
for i in xrange(no_classes): | |
no_dishes = np.where(y == i)[0].shape[0] | |
priors[i] = no_dishes / float(no_classes) | |
# And conditionals: | |
conds = np.zeros((len(features), no_classes), dtype=np.float64) | |
# for each class | |
for i in xrange(no_classes): | |
# extract the dishes matching that class | |
indices = np.where(y == i)[0] | |
if indices.shape[0] == 0: | |
continue | |
dishes_in_class = x[indices, :] | |
# count the number of dishes holding | |
# a precise feature (active feature) | |
no_dishes_in_class_per_f = dishes_in_class.sum(0) | |
total_dishes_in_class = no_dishes_in_class_per_f.sum() | |
conds[:, i] = no_dishes_in_class_per_f / float(total_dishes_in_class) | |
# Now we can write the classifier: | |
def classify(dish): | |
res = priors.copy() | |
for i in xrange(no_classes): | |
indices = np.nonzero(dish) | |
probs = dish[indices] * conds[indices, i] | |
res[i] *= probs.prod() | |
print res | |
i = np.argmax(res) | |
return classes_labels[i] | |
if __name__ == '__main__': | |
while True: | |
raw_input() | |
dish, _ = gen_dish() | |
print dish_repr(dish) | |
print classify(dish) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment