Skip to content

Instantly share code, notes, and snippets.

@davidlj95
Created January 12, 2017 02:25
Show Gist options
  • Save davidlj95/b4f26d46c14ca7bd413d83d4dcc4ab26 to your computer and use it in GitHub Desktop.
Save davidlj95/b4f26d46c14ca7bd413d83d4dcc4ab26 to your computer and use it in GitHub Desktop.
Simple script to calculate mean, standard deviation, variance and optionally the gaussian probability for a given target value inside a given set of elements, that can be divided in boolean classes to calculate the statistics for each subset. Was designed as help for calculus in Naïve Bayes algorithm exercises
# Libraries
import math
import sys
# Constants
MIN_ARGS = 1
# Parameters
values = None
values_yes = None
values_no = None
target = None
verbose = False
# Help message
def help(exit_code=1):
print("Usage: python",args[0],"{<values> [<target>|{<indexes_yes>}]|-e <exercise> [-t <target>]} [-v] [-h]\n")
print("Given a list of comma-separated values, and a target, returns "
"the gaussian probability for the target value\n")
print("If given indexes (comma-separated) for the yes class, it will "
"calculate gaussian probability for values whose class=no and "
"class=yes (indexes start by 1)\n")
print("If specified -e, you can select an exercise, where values are "
"already defined\n""")
print("Specify -v to print all values in all classes, -h to see this "
"help message")
sys.exit(exit_code)
# Argument parsing
args = sys.argv
if len(args)-1 < MIN_ARGS:
print("ERROR: Not enough arguments")
help()
# Check options
if "-h" in args:
help(0)
if "-v" in args:
verbose = True
if args[1] == "-e":
# Exercise mode
# Definitions
exercises = {
"theory": {
"values" : [83,70,68,64,69,75,75,72,81,85,80,65,72,71]
},
"problems": {
"values" : [n*1000 for n in [125,100,70,120,60,220,75,95,85,90]]
}
}
exercises["problems"]["values_yes"] = exercises["problems"]["values"][:7]
exercises["problems"]["values_no"] = exercises["problems"]["values"][7:]
exercises["theory"]["values_yes"] = exercises["theory"]["values"][:9]
exercises["theory"]["values_no"] = exercises["theory"]["values"][9:]
# Help
def show_exercises():
print("Available exercises are:")
print(list(exercises.keys()))
sys.exit(3)
# Exercise selection
if len(args)-1 < 2:
print("ERROR: Not enough arguments, please specify an exercise")
show_exercises()
if args[2] not in exercises.keys():
print("ERROR: Exercise",args[2],"not defined")
show_exercises()
# Assignation
exercise = exercises[args[2]]
values = exercise["values"]
values_yes = exercise["values_yes"]
values_no = exercise["values_no"]
# Optional target assignation
if len(args)-1 > 2:
try:
target = float(args[3])
except Exception as e:
print("ERROR: Unable to parse target to float",str(e))
sys.exit(6)
else:
# Manual mode
# Values parsing
try:
values = [float(n) for n in args[1].split(',')]
except Exception as e:
print("ERROR: Unable to parse values to floats:",str(e))
sys.exit(2)
if len(values) == 1:
print("ERROR: Specify more elements in the values list, please")
sys.exit(7)
# Target parsing
if len(args)-1 > 1:
try:
target = float(args[2])
except Exception as e:
print("ERROR: Unable to parse target to float:",str(e))
sys.exit(6)
# Yes class parsing
if len(args)-1 > 2:
indexes_yes = args[3].split(",")
try:
indexes_yes = [int(n) for n in indexes_yes]
except Exception as e:
print("ERROR: Unable to parse indexes to ints:",str(e))
sys.exit(4)
try:
values_yes = [values[i-1] for i in indexes_yes]
except Exception as e:
print("ERROR: Unable to pick indexes values:",str(e))
sys.exit(5)
values_no = list(set(values)-set(values_yes))
if len(values_yes) < 2 or len(values_no) < 2:
print("ERROR: Some class has less than 2 values:")
print("Values(class=yes):",values_yes)
print("Values(class=no):",values_no)
sys.exit(7)
# Statistical functions
def mean(v):
return sum(v)/len(v)
def variance(v):
u = mean(v)
return sum([(u-e)**2 for e in v])/(len(v)-1)
def sdev(v):
return math.sqrt(variance(v))
def gaussian(t, v):
s2 = variance(v)
s = sdev(v)
u = mean(v)
divisor = math.sqrt(2*math.pi)*s
exponent = ((t-u)**2)/(s2)
return (math.e**(-exponent))/divisor
# Information
def print_info(values,target=None):
if(verbose):
print("Values: ",values)
print("Mean:",mean(values),"| sdev:",sdev(values),"| Variance:",variance(values))
if target != None:
print("Gaussian Probability for",target,":",gaussian(target,values))
print("")
# Results
print("ALL VALUES")
print("--------------------------------------------------------------")
print_info(values,target)
if values_yes !=None:
print("VALUES (class=yes)")
print("--------------------------------------------------------------")
print_info(values_yes,target)
if values_no != None:
print("VALUES (class=no)")
print("--------------------------------------------------------------")
print_info(values_no,target)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment