Created
January 12, 2017 02:25
-
-
Save davidlj95/b4f26d46c14ca7bd413d83d4dcc4ab26 to your computer and use it in GitHub Desktop.
Simple script to calculate mean, standard deviation, variance and optionally the gaussian probability for a given target value inside a given set of elements, that can be divided in boolean classes to calculate the statistics for each subset. Was designed as help for calculus in Naïve Bayes algorithm exercises
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Libraries | |
import math | |
import sys | |
# Constants | |
MIN_ARGS = 1 | |
# Parameters | |
values = None | |
values_yes = None | |
values_no = None | |
target = None | |
verbose = False | |
# Help message | |
def help(exit_code=1): | |
print("Usage: python",args[0],"{<values> [<target>|{<indexes_yes>}]|-e <exercise> [-t <target>]} [-v] [-h]\n") | |
print("Given a list of comma-separated values, and a target, returns " | |
"the gaussian probability for the target value\n") | |
print("If given indexes (comma-separated) for the yes class, it will " | |
"calculate gaussian probability for values whose class=no and " | |
"class=yes (indexes start by 1)\n") | |
print("If specified -e, you can select an exercise, where values are " | |
"already defined\n""") | |
print("Specify -v to print all values in all classes, -h to see this " | |
"help message") | |
sys.exit(exit_code) | |
# Argument parsing | |
args = sys.argv | |
if len(args)-1 < MIN_ARGS: | |
print("ERROR: Not enough arguments") | |
help() | |
# Check options | |
if "-h" in args: | |
help(0) | |
if "-v" in args: | |
verbose = True | |
if args[1] == "-e": | |
# Exercise mode | |
# Definitions | |
exercises = { | |
"theory": { | |
"values" : [83,70,68,64,69,75,75,72,81,85,80,65,72,71] | |
}, | |
"problems": { | |
"values" : [n*1000 for n in [125,100,70,120,60,220,75,95,85,90]] | |
} | |
} | |
exercises["problems"]["values_yes"] = exercises["problems"]["values"][:7] | |
exercises["problems"]["values_no"] = exercises["problems"]["values"][7:] | |
exercises["theory"]["values_yes"] = exercises["theory"]["values"][:9] | |
exercises["theory"]["values_no"] = exercises["theory"]["values"][9:] | |
# Help | |
def show_exercises(): | |
print("Available exercises are:") | |
print(list(exercises.keys())) | |
sys.exit(3) | |
# Exercise selection | |
if len(args)-1 < 2: | |
print("ERROR: Not enough arguments, please specify an exercise") | |
show_exercises() | |
if args[2] not in exercises.keys(): | |
print("ERROR: Exercise",args[2],"not defined") | |
show_exercises() | |
# Assignation | |
exercise = exercises[args[2]] | |
values = exercise["values"] | |
values_yes = exercise["values_yes"] | |
values_no = exercise["values_no"] | |
# Optional target assignation | |
if len(args)-1 > 2: | |
try: | |
target = float(args[3]) | |
except Exception as e: | |
print("ERROR: Unable to parse target to float",str(e)) | |
sys.exit(6) | |
else: | |
# Manual mode | |
# Values parsing | |
try: | |
values = [float(n) for n in args[1].split(',')] | |
except Exception as e: | |
print("ERROR: Unable to parse values to floats:",str(e)) | |
sys.exit(2) | |
if len(values) == 1: | |
print("ERROR: Specify more elements in the values list, please") | |
sys.exit(7) | |
# Target parsing | |
if len(args)-1 > 1: | |
try: | |
target = float(args[2]) | |
except Exception as e: | |
print("ERROR: Unable to parse target to float:",str(e)) | |
sys.exit(6) | |
# Yes class parsing | |
if len(args)-1 > 2: | |
indexes_yes = args[3].split(",") | |
try: | |
indexes_yes = [int(n) for n in indexes_yes] | |
except Exception as e: | |
print("ERROR: Unable to parse indexes to ints:",str(e)) | |
sys.exit(4) | |
try: | |
values_yes = [values[i-1] for i in indexes_yes] | |
except Exception as e: | |
print("ERROR: Unable to pick indexes values:",str(e)) | |
sys.exit(5) | |
values_no = list(set(values)-set(values_yes)) | |
if len(values_yes) < 2 or len(values_no) < 2: | |
print("ERROR: Some class has less than 2 values:") | |
print("Values(class=yes):",values_yes) | |
print("Values(class=no):",values_no) | |
sys.exit(7) | |
# Statistical functions | |
def mean(v): | |
return sum(v)/len(v) | |
def variance(v): | |
u = mean(v) | |
return sum([(u-e)**2 for e in v])/(len(v)-1) | |
def sdev(v): | |
return math.sqrt(variance(v)) | |
def gaussian(t, v): | |
s2 = variance(v) | |
s = sdev(v) | |
u = mean(v) | |
divisor = math.sqrt(2*math.pi)*s | |
exponent = ((t-u)**2)/(s2) | |
return (math.e**(-exponent))/divisor | |
# Information | |
def print_info(values,target=None): | |
if(verbose): | |
print("Values: ",values) | |
print("Mean:",mean(values),"| sdev:",sdev(values),"| Variance:",variance(values)) | |
if target != None: | |
print("Gaussian Probability for",target,":",gaussian(target,values)) | |
print("") | |
# Results | |
print("ALL VALUES") | |
print("--------------------------------------------------------------") | |
print_info(values,target) | |
if values_yes !=None: | |
print("VALUES (class=yes)") | |
print("--------------------------------------------------------------") | |
print_info(values_yes,target) | |
if values_no != None: | |
print("VALUES (class=no)") | |
print("--------------------------------------------------------------") | |
print_info(values_no,target) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment