Created
November 4, 2014 18:40
-
-
Save ShigekiKarita/d192772e1373d60eada3 to your computer and use it in GitHub Desktop.
descision_tree.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from math import log2 | |
from csv import reader | |
from collections import Counter | |
from sys import argv | |
def transpose(xx): | |
return map(list, zip(*xx)) | |
def self_entropy(x): | |
return x * log2(x) | |
def entropy(xs): | |
ps = [xs.count(k) / len(xs) for k in set(xs)] | |
return - sum(map(self_entropy, ps)) | |
def qualified_entropy(labels, xs): | |
def dict_ps(xs): | |
return {k: xs.count(k) for k in set(xs)} | |
qualifiers = dict_ps(xs) | |
pairs = dict_ps(list(zip(xs, labels))) # (attr, class): P | |
ret = 0.0 | |
for k, v in pairs.items(): | |
q = qualifiers[k[0]] | |
ret += q / len(xs) * self_entropy(v / q) | |
return ret | |
def gain(values, x): | |
labels = values["class"] | |
xs = values[x] | |
return entropy(labels) + qualified_entropy(labels, xs) | |
def heads(xs): | |
return [x for x in xs if x[1] == xs[0][1]] | |
def deeper(data): | |
values = {xs[0]: xs[1:] for xs in transpose(data)} | |
attributes = [x for x in data[0] if x != "class"] | |
gains = {x: gain(values, x) for x in attributes} | |
gains = sorted(gains.items(), key=lambda x: -x[1]) | |
print("gain:", gains) | |
max_attr = heads(gains)[0][0] | |
max_values = heads(Counter(values[max_attr]).most_common()) | |
print("next:", max_attr, "/", max_values[0][0]) | |
next_attrs = [x for x in data[0] if x != max_attr] | |
next_values = [xs[1:] for xs in data if max_values[0][0] in xs] | |
return [next_attrs] + next_values | |
if __name__ == '__main__': | |
if len(argv) == 1: | |
argv.append('table.csv') | |
data = list(reader(open(argv[1], 'r'))) | |
while len(data[0]) != 1: | |
data = deeper(data) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Outlook | Temperature | Humidity | Windy | class | |
---|---|---|---|---|---|
sunny | hot | high | FALSE | N | |
sunny | hot | high | TRUE | N | |
overcast | hot | high | FALSE | P | |
rain | mild | high | FALSE | P | |
rain | cool | normal | FALSE | P | |
rain | cool | normal | TRUE | N | |
overcast | cool | normal | TRUE | P | |
sunny | mild | high | FALSE | N | |
sunny | cool | normal | FALSE | P | |
rain | mild | normal | FALSE | P | |
sunny | mild | normal | TRUE | P | |
overcast | mild | high | TRUE | P | |
overcast | hot | normal | FALSE | P | |
rain | mild | high | TRUE | N |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
age of the patient | spectacle prescription | astigmatic | tear production rate | class | |
---|---|---|---|---|---|
young | myope | no | normal | soft | |
young | myope | yes | abnormal | hard | |
young | hypermetrope | yes | normal | soft | |
pre-presbyopic | hypermetrope | yes | normal | hard | |
pre-presbyopic | myope | no | abnormal | soft | |
pre-presbyopic | myope | no | normal | hard | |
pre-presbyopic | hypermetrope | no | normal | soft | |
presbyopic | myope | yes | abnormal | hard | |
presbyopic | hypermetrope | yes | normal | soft |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment