Skip to content

Instantly share code, notes, and snippets.

@matmoody
Created April 20, 2016 03:52
Show Gist options
  • Save matmoody/5d4b89ce31bf16eadb249068365c791c to your computer and use it in GitHub Desktop.
Save matmoody/5d4b89ce31bf16eadb249068365c791c to your computer and use it in GitHub Desktop.
Overview of Stats
import pandas as pd
from scipy import stats
data = '''Region,Alcohol,Tobacco
North,6.47,4.03
Yorkshire,6.13,3.76
Northeast,6.19,3.77
East Midlands,4.89,3.34
West Midlands,5.63,3.47
East Anglia,4.52,2.92
Southeast,5.89,3.20
Southwest,4.79,2.71
Wales,5.27,3.53
Scotland,6.08,4.51
Northern Ireland,4.02,4.56'''
data = data.splitlines()
data = [i.split(',') for i in data]
# Create Pandas DataFrame
column_names = data[0] # first row
data_rows = data[1::] # all following rows
df = pd.DataFrame(data_rows, columns=column_names)
df['Alcohol'] = df['Alcohol'].astype(float)
df['Tobacco'] = df['Tobacco'].astype(float)
alcohol_mean = df['Alcohol'].mean()
alcohol_median = df['Alcohol'].median()
alcohol_mode = stats.mode(df['Alcohol'])
tobacco_mean = df['Tobacco'].mean()
tobacco_median = df['Tobacco'].median()
tobacco_mean stats.mode(df['Tobacco'])
alcohol_range = max(df['Alcohol']) - min(df['Alcohol'])
tobacco_range = max(df['Tobacco']) - min(df['Tobacco'])
alcohol_variance = df['Alcohol'].var()
tobacco_variance = df['Tobacco'].var()
alcohol_std = df['Alcohol'].std()
tobacco_std = df['Tobacco'].std()
# range, variance, and std
print "The mean for the Alcohol dataset is %f" % alcohol_mean
print "The mean for the Tobacco dataset is %f" % tobacco_mean
print "The median for the Alcohol dataset is %f" % alcohol_median
print "The median for the Tobacco dataset is %f" % tobacco_median
print "The mode for the Alcohol dataset is %f" % alcohol_mode
print "The mode for the Tobacco dataset is %f" % tobacco_mode
print "The range for the Alcohol dataset is %f" % alcohol_range
print "The range for the Tobacco dataset is %f" % tobacco_range
print "The variance for the Alcohol dataset is %f" % alcohol_variance
print "The variance for the Tobacco dataset is %f" % tobacco_variance
print "The standard deviation for the Alcohol dataset is %f" % alcohol_std
print "The standard deviation for the Tobacco dataset is %f" % tobacco_std
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment