Created
November 9, 2017 18:55
-
-
Save jmuhlich/e8d5ce02df6d3cb2db0c2bd6acaa8d6d to your computer and use it in GitHub Desktop.
Loading, tidying and plotting CycIF data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import itertools | |
import pandas as pd | |
# Create lists with the different factors. | |
rows = ['B', 'C', 'D', 'E', 'F', 'G'] | |
columns = range(2, 10 + 1) | |
fields = range(1, 12 + 1) | |
compartments = ['cyto', 'nuc'] | |
channels = ['DAPI', 'Cy3', 'Cy5', 'FITC'] | |
cycles = [1, 2] | |
# Generate a dataframe with all combinations of all factors (Cartesian product). | |
design = pd.DataFrame(list(itertools.product( | |
rows, columns, fields, compartments, channels, cycles | |
))) | |
design.columns = ['Row', 'Column', 'Field', 'Compartment', 'Channel', 'Cycle'] | |
# Add some helpful columns that are made up by combining other factors. | |
design['Well'] = ( | |
design['Row'] | |
+ design['Column'].astype(str).str.pad(2, fillchar='0') | |
) | |
design['ChannelCycle'] = ( | |
design['Channel'] | |
+ '-' | |
+ design['Cycle'].astype(str).str.pad(4, fillchar='0') | |
) | |
# Fill in the Day (timepoint) base on a pattern on Row. | |
design.loc[design['Row'].isin(['B', 'C', 'D']), 'Day'] = 3 | |
design.loc[design['Row'].isin(['E', 'F', 'G']), 'Day'] = 2 | |
# Fill in the Treatment by applying the below "matrix" over patterns on Row and | |
# Column. | |
design['Treatment'] = None | |
trt_matrix = [ | |
['DMSO', 'Fulv', 'Alp'], | |
['Tram', 'Fulv+Alp', 'Fulv+Tram'], | |
['Alp+Tram', 'Fulv+Alp+Tram', 'DMSO'] | |
] | |
tm_cols = [[2,5,8], [3,6,9], [4,7,10]] | |
tm_rows = [['B', 'E'], ['C', 'F'], ['D', 'G']] | |
for c, columns in enumerate(tm_cols): | |
for r, rows in enumerate(tm_rows): | |
idx = (design['Column'].isin(columns)) & (design['Row'].isin(rows)) | |
design.loc[idx, 'Treatment'] = trt_matrix[c][r] | |
# Fill in the Marker by applying various patterns on Column. | |
markers = pd.Series(index=design.index) | |
markers.loc[design['Channel'] == 'DAPI'] = 'DAPI' | |
markers.loc[design['ChannelCycle'] == 'Cy3-0001'] = 'pRB' | |
markers.loc[(design['ChannelCycle'] == 'Cy5-0000') & design['Column'].isin([2,3,4])] = 'cMyc' | |
markers.loc[(design['ChannelCycle'] == 'Cy5-0000') & design['Column'].isin([5,6,7])] = 'pStat3' | |
markers.loc[(design['ChannelCycle'] == 'Cy5-0000') & design['Column'].isin([8,9,10])] = 'cJun' | |
markers.loc[(design['ChannelCycle'] == 'Cy5-0001') & design['Column'].isin([2,3,4])] = 'p21' | |
markers.loc[(design['ChannelCycle'] == 'Cy5-0001') & design['Column'].isin([5,6,7])] = 'NFkB' | |
markers.loc[(design['ChannelCycle'] == 'FITC-0001') & design['Column'].isin([2,3,4])] = 'CyclinD' | |
markers.loc[(design['ChannelCycle'] == 'FITC-0001') & design['Column'].isin([5,6,7])] = 'ERa' | |
markers.loc[(design['ChannelCycle'] == 'FITC-0001') & design['Column'].isin([8,9,10])] = 'pS6-240' | |
design['Marker'] = markers |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import glob | |
import pandas as pd | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
# Load all Cyto*.txt files into a single dataframe. | |
df = pd.concat([ | |
pd.read_table(path, index_col=0) | |
for path in glob.glob('Results-Cyto-*.txt') | |
]) | |
# Parse the Label column into its components, in a new dataframe. | |
label_info = df.Label.str.extract(r'([A-Z])(\d\d)_fld(\d+):(\d+)-(\d+):(.*)', | |
expand=True) | |
label_info.columns = ['Row', 'Column', 'Field', 'ObjNum', 'Unknown', 'Channel'] | |
label_info['Well'] = label_info['Row'] + label_info['Column'] | |
label_info['Row'] = (label_info.Row.map(ord) - ord('A') + 1) | |
for f in 'Row', 'Column', 'Field', 'ObjNum', 'Unknown': | |
label_info[f] = label_info[f].astype(int) | |
# Append these new columns to the data. | |
dfl = pd.concat([label_info, df], axis=1) | |
# Extract the metrics from the DAPI channel in cycle 1, from well B06. | |
well = dfl[(dfl['Well'] == 'B06') & (dfl['Channel'] == 'DAPI-0001')] | |
# Extract field 1 only. | |
well_field = well[well['Field'] == 1] | |
# Plot X vs Y for field 1, visualizing the individual cell positions. | |
well_field.plot.scatter('X', 'Y') | |
# Plot X vs Y for all fields. | |
sns.lmplot('X', 'Y', well, col='Field', col_wrap=4, fit_reg=False) | |
# Plot fitted elipse major vs minor axes, with a linear fit and confidence | |
# interval.x | |
sns.lmplot('Major', 'Minor', well, col='Field', col_wrap=4) | |
# Extract the "Mean" metric from all channels for all fields in well B06, and | |
# pivot the result so that each channel becomes its own column. | |
wmean = pd.pivot_table(dfl[dfl['Well'] == 'B06'], | |
'Mean', ['Field', 'ObjNum'], 'Channel') | |
# Show scatter plots for all pairwise channel combinations. | |
sns.pairplot(wmean, diag_kind='kde', | |
plot_kws={'edgecolor':'none', 's':1, 'alpha':.3}) | |
plt.gcf().subplots_adjust(bottom=0.05, left=0.05) | |
# Same as above, except for three wells, each in a different color. | |
wm3 = pd.pivot_table( | |
dfl[dfl['Well'].isin(['B02', 'E06', 'G10'])], | |
'Mean', ['Well', 'Field', 'ObjNum'], 'Channel' | |
).reset_index('Well') | |
sns.pairplot(wm3, hue='Well', | |
plot_kws={'edgecolor':'none', 's':1, 'alpha':.3}, | |
diag_kws={'histtype':'step', 'bins':30}) | |
plt.gcf().subplots_adjust(bottom=0.05, left=0.05) | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment