Created
April 20, 2016 03:52
-
-
Save matmoody/5d4b89ce31bf16eadb249068365c791c to your computer and use it in GitHub Desktop.
Overview of Stats
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from scipy import stats | |
data = '''Region,Alcohol,Tobacco | |
North,6.47,4.03 | |
Yorkshire,6.13,3.76 | |
Northeast,6.19,3.77 | |
East Midlands,4.89,3.34 | |
West Midlands,5.63,3.47 | |
East Anglia,4.52,2.92 | |
Southeast,5.89,3.20 | |
Southwest,4.79,2.71 | |
Wales,5.27,3.53 | |
Scotland,6.08,4.51 | |
Northern Ireland,4.02,4.56''' | |
data = data.splitlines() | |
data = [i.split(',') for i in data] | |
# Create Pandas DataFrame | |
column_names = data[0] # first row | |
data_rows = data[1::] # all following rows | |
df = pd.DataFrame(data_rows, columns=column_names) | |
df['Alcohol'] = df['Alcohol'].astype(float) | |
df['Tobacco'] = df['Tobacco'].astype(float) | |
alcohol_mean = df['Alcohol'].mean() | |
alcohol_median = df['Alcohol'].median() | |
alcohol_mode = stats.mode(df['Alcohol']) | |
tobacco_mean = df['Tobacco'].mean() | |
tobacco_median = df['Tobacco'].median() | |
tobacco_mean stats.mode(df['Tobacco']) | |
alcohol_range = max(df['Alcohol']) - min(df['Alcohol']) | |
tobacco_range = max(df['Tobacco']) - min(df['Tobacco']) | |
alcohol_variance = df['Alcohol'].var() | |
tobacco_variance = df['Tobacco'].var() | |
alcohol_std = df['Alcohol'].std() | |
tobacco_std = df['Tobacco'].std() | |
# range, variance, and std | |
print "The mean for the Alcohol dataset is %f" % alcohol_mean | |
print "The mean for the Tobacco dataset is %f" % tobacco_mean | |
print "The median for the Alcohol dataset is %f" % alcohol_median | |
print "The median for the Tobacco dataset is %f" % tobacco_median | |
print "The mode for the Alcohol dataset is %f" % alcohol_mode | |
print "The mode for the Tobacco dataset is %f" % tobacco_mode | |
print "The range for the Alcohol dataset is %f" % alcohol_range | |
print "The range for the Tobacco dataset is %f" % tobacco_range | |
print "The variance for the Alcohol dataset is %f" % alcohol_variance | |
print "The variance for the Tobacco dataset is %f" % tobacco_variance | |
print "The standard deviation for the Alcohol dataset is %f" % alcohol_std | |
print "The standard deviation for the Tobacco dataset is %f" % tobacco_std |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment