Created
February 21, 2016 00:48
-
-
Save Swarchal/6d3658d9907c31f6d315 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import division | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import pymc as pm | |
from scipy import stats | |
import seaborn as sns | |
sns.set_style('whitegrid') | |
# read in data | |
df = pd.read_csv('/home/scott/Dropbox/cell_data_TISS.csv') | |
# subset columns of interest | |
df_subset = df[['cell_AreaShape_Area', 'Metadata_compound', 'Metadata_concentration']] | |
# filter DMSO values | |
df_dmso = df_subset.query('Metadata_compound == "DMSO"') | |
# filter drug (dastanib) at 1uM values | |
df_drug = df_subset.query('Metadata_compound == "dasatinib" and Metadata_concentration == 1') | |
# calculate t-test | |
t_out, p_value = stats.ttest_ind(df_drug.cell_AreaShape_Area, df_dmso.cell_AreaShape_Area) | |
# cell areas | |
area_control = df_dmso.cell_AreaShape_Area | |
area_drug = df_drug.cell_AreaShape_Area | |
print 'mean DMSO:', area_control.mean() | |
print 'mean SN38:' , area_drug.mean() | |
print 'p-value: ', p_value | |
# plot boxplot | |
plt.figure(figsize=(4,4)) | |
ax = plt.subplot(111) | |
data = [area_control, area_drug] | |
plt.boxplot(data) | |
plt.xticks([1, 2], ['Control', 'Drug']) | |
plt.ylim(-50, 15000) | |
plt.ylabel('Cell area (pixels)') | |
plt.title('Difference in cell area') | |
plt.savefig('boxplot.png', bbox_inches = 'tight', pad_inches = 0.5) | |
# BEST (Bayesian estimation superceedes t-test) | |
# np.r_ concatenates the objects, think of c() in R | |
pooled_mean = np.r_[area_control, area_drug].mean() | |
pooled_std = np.r_[area_control, area_drug].std() | |
tau = 1 / np.sqrt(1000 * pooled_std) | |
mu_control = pm.Normal('mu_control', pooled_mean, tau) | |
mu_drug = pm.Normal('mu_drug', pooled_mean, tau) | |
std_control = pm.Uniform('std_control', pooled_std / 1000, 1000 * pooled_std) | |
std_drug = pm.Uniform('std_drug', pooled_std / 1000, 1000 * pooled_std) | |
nu_minus_1 = pm.Exponential('nu-1', 1 / 29) | |
obs_A = pm.NoncentralT('obs_A', mu_control, 1 / std_control**2, nu_minus_1 + 1, | |
observed = True, value = area_control) | |
obs_B = pm.NoncentralT('obs_B', mu_drug, 1 / std_drug**2, nu_minus_1 + 1, | |
observed = True, value = area_drug) | |
mcmc = pm.MCMC([obs_A, obs_B, mu_control, mu_drug, std_control, std_drug, nu_minus_1]) | |
mcmc.sample(50000, 10000) | |
mu_control_trace, mu_drug_trace = mcmc.trace('mu_control')[:], mcmc.trace('mu_drug')[:] | |
std_control_trace, std_drug_trace = mcmc.trace('std_control')[:], mcmc.trace('std_drug')[:] | |
nu_trace = mcmc.trace('nu-1')[:] + 1 | |
# determining the difference | |
def relative_increase(a, b): | |
return abs(a - b) | |
posterior_rel_increase = relative_increase(mu_control_trace, mu_drug_trace) | |
# plot distributions from BEST | |
def _hist(data, label, **kwargs): | |
return plt.hist(data, bins = 40, histtype = 'stepfilled', | |
alpha = 0.75, label = label, **kwargs) | |
plt.figure(figsize=(12, 12)) | |
ax = plt.subplot(311) | |
_hist(mu_control_trace, 'Control', facecolor = 'darkorange') | |
_hist(mu_drug_trace, 'Drug', facecolor = 'cornflowerblue') | |
plt.legend() | |
plt.title(r'Posterior distributions of $\mu$') | |
ax = plt.subplot(312) | |
_hist(std_control_trace, 'Control', facecolor = 'darkorange') | |
_hist(std_drug_trace, 'Drug', facecolor = 'cornflowerblue') | |
plt.ylabel('density') | |
plt.legend() | |
plt.title(r'Posterior distributions of $\sigma$') | |
ax = plt.subplot(313) | |
plt.title("Posterior distribution of the difference") | |
_hist(posterior_rel_increase, 'relative increase', color = '#7A68A6') | |
plt.xlabel('cell area (pixels)') | |
plt.savefig('bayes_cells.png', bbox_inches = 'tight', pad_inches = 0.5) | |
# histogram of the raw cell areas | |
plt.figure(figsize=(8, 6)) | |
ax = plt.subplot(111) | |
n, bins, patches = plt.hist(area_control, 100, normed = 1, label = 'Control', | |
facecolor = 'darkorange', alpha = 0.75) | |
n, bins, patches = plt.hist(area_drug, 100, normed = 1, label = 'Drug', | |
facecolor = 'cornflowerblue', alpha = 0.75) | |
plt.xlim(0, 20000) | |
plt.xlabel('Cell area (pixels)') | |
plt.ylabel('Density') | |
plt.legend() | |
plt.title('Histogram of cell area') | |
plt.savefig('Difference.png', bbox_inches = 'tight', pad_inches = 0.5) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment