Created
February 22, 2017 02:32
-
-
Save BioSciEconomist/904ace29e204ca5308ade5ed0cd68920 to your computer and use it in GitHub Desktop.
Basic Statistics in Python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#------------------------------------------------------------------ | |
# PROGRAM NAME: python basic stats.py | |
# DATE: 2/20/16 | |
# CREATED BY: MATT BOGARD | |
# PROJECT FILE: | |
#---------------------------------------------------------------- | |
# PURPOSE: BASIC STATS IN PYTHON | |
#--------------------------------------------------------------- | |
import pandas as pd #primary package for data manipulation | |
# make a data frame manually | |
data = {'GARST' :[150,140,145,137,141,145,149,153,157,161], | |
'PIO':[160,150,146,138,142,146,150,154,158,162], | |
'MYC':[137,148,151,139,143,120,115,136,130,129], | |
'DEK':[150,149,145,140,144,148,152,156,160,164], | |
'PLOT':[1,2,3,4,5,6,7,8,9,10], | |
'BT': ['Y','Y', 'N','N','N','N','Y','N','Y','Y'], | |
'RR':['Y','N','Y','N','N','N','N','Y','Y','N'], | |
} | |
yield_data = pd.DataFrame(data,columns=['GARST','PIO','MYC','DEK','PLOT','BT','RR']) | |
# create gmo trait field | |
# set a default value | |
yield_data['GMO'] = 'Non-GMO '; | |
# check for RR traits | |
yield_data['GMO'][yield_data['RR']== 'Y']= 'Single Trait ' | |
# check for BT traits | |
yield_data['GMO'][yield_data['BT']== 'Y']= 'Single Trait ' | |
# check for double stacked traits | |
yield_data['GMO'][(yield_data['BT']== 'Y') & (yield_data['RR']=='Y')]= 'Stacked Trait' | |
#---------------------------------- | |
# descriptive statistics | |
#---------------------------------- | |
#from pandas | |
#descriptives | |
yield_data.describe() | |
# descriptives by group | |
groupby_gmo = yield_data.groupby('GMO') | |
groupby_gmo.mean() | |
#frequencies | |
yield_data.GMO.value_counts().sort_index() | |
#------------------------------ | |
# graphics | |
#------------------------------ | |
import numpy as np | |
import matplotlib.pyplot as plt | |
hist(yield_data['GARST'],bins= 3,color='green') | |
yield_data.plot('GARST', 'PIO', kind = 'scatter') | |
#----------------------------------- | |
# t-tests | |
#----------------------------------- | |
from scipy import stats | |
stats.ttest_ind(yield_data.GARST,yield_data.PIO) # output gives t-stat and pval | |
#welch's 2 sample (unequal var) | |
stats.ttest_ind(yield_data.GARST,yield_data.PIO, equal_var = False) | |
#run as if this were a paired ttest | |
stats.ttest_rel(yield_data.GARST,yield_data.PIO) | |
#----------------------------------- | |
# correlations | |
#----------------------------------- | |
# from pandas | |
print( yield_data[['GARST','PIO']].corr()) | |
#----------------------------------- | |
# regression | |
#----------------------------------- | |
import statsmodels.api as sm | |
import statsmodels.formula.api as smf | |
# regression using smf | |
results = smf.ols('GARST ~ PIO', data=yield_data).fit() | |
# inspect results | |
print results.summary() | |
# regression using sm | |
X = yield_data[['PIO']] | |
y = yield_data[['GARST']] | |
X.head() | |
y.head() | |
## fit a OLS model with intercept sales and income | |
X = sm.add_constant(X) | |
X.head() | |
est = sm.OLS(y, X).fit() | |
est.summary() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment