Skip to content

Instantly share code, notes, and snippets.

@jmuhlich
Created November 9, 2017 18:55
Show Gist options
  • Save jmuhlich/e8d5ce02df6d3cb2db0c2bd6acaa8d6d to your computer and use it in GitHub Desktop.
Save jmuhlich/e8d5ce02df6d3cb2db0c2bd6acaa8d6d to your computer and use it in GitHub Desktop.
Loading, tidying and plotting CycIF data
import itertools
import pandas as pd
# Create lists with the different factors.
rows = ['B', 'C', 'D', 'E', 'F', 'G']
columns = range(2, 10 + 1)
fields = range(1, 12 + 1)
compartments = ['cyto', 'nuc']
channels = ['DAPI', 'Cy3', 'Cy5', 'FITC']
cycles = [1, 2]
# Generate a dataframe with all combinations of all factors (Cartesian product).
design = pd.DataFrame(list(itertools.product(
rows, columns, fields, compartments, channels, cycles
)))
design.columns = ['Row', 'Column', 'Field', 'Compartment', 'Channel', 'Cycle']
# Add some helpful columns that are made up by combining other factors.
design['Well'] = (
design['Row']
+ design['Column'].astype(str).str.pad(2, fillchar='0')
)
design['ChannelCycle'] = (
design['Channel']
+ '-'
+ design['Cycle'].astype(str).str.pad(4, fillchar='0')
)
# Fill in the Day (timepoint) base on a pattern on Row.
design.loc[design['Row'].isin(['B', 'C', 'D']), 'Day'] = 3
design.loc[design['Row'].isin(['E', 'F', 'G']), 'Day'] = 2
# Fill in the Treatment by applying the below "matrix" over patterns on Row and
# Column.
design['Treatment'] = None
trt_matrix = [
['DMSO', 'Fulv', 'Alp'],
['Tram', 'Fulv+Alp', 'Fulv+Tram'],
['Alp+Tram', 'Fulv+Alp+Tram', 'DMSO']
]
tm_cols = [[2,5,8], [3,6,9], [4,7,10]]
tm_rows = [['B', 'E'], ['C', 'F'], ['D', 'G']]
for c, columns in enumerate(tm_cols):
for r, rows in enumerate(tm_rows):
idx = (design['Column'].isin(columns)) & (design['Row'].isin(rows))
design.loc[idx, 'Treatment'] = trt_matrix[c][r]
# Fill in the Marker by applying various patterns on Column.
markers = pd.Series(index=design.index)
markers.loc[design['Channel'] == 'DAPI'] = 'DAPI'
markers.loc[design['ChannelCycle'] == 'Cy3-0001'] = 'pRB'
markers.loc[(design['ChannelCycle'] == 'Cy5-0000') & design['Column'].isin([2,3,4])] = 'cMyc'
markers.loc[(design['ChannelCycle'] == 'Cy5-0000') & design['Column'].isin([5,6,7])] = 'pStat3'
markers.loc[(design['ChannelCycle'] == 'Cy5-0000') & design['Column'].isin([8,9,10])] = 'cJun'
markers.loc[(design['ChannelCycle'] == 'Cy5-0001') & design['Column'].isin([2,3,4])] = 'p21'
markers.loc[(design['ChannelCycle'] == 'Cy5-0001') & design['Column'].isin([5,6,7])] = 'NFkB'
markers.loc[(design['ChannelCycle'] == 'FITC-0001') & design['Column'].isin([2,3,4])] = 'CyclinD'
markers.loc[(design['ChannelCycle'] == 'FITC-0001') & design['Column'].isin([5,6,7])] = 'ERa'
markers.loc[(design['ChannelCycle'] == 'FITC-0001') & design['Column'].isin([8,9,10])] = 'pS6-240'
design['Marker'] = markers
import glob
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# Load all Cyto*.txt files into a single dataframe.
df = pd.concat([
pd.read_table(path, index_col=0)
for path in glob.glob('Results-Cyto-*.txt')
])
# Parse the Label column into its components, in a new dataframe.
label_info = df.Label.str.extract(r'([A-Z])(\d\d)_fld(\d+):(\d+)-(\d+):(.*)',
expand=True)
label_info.columns = ['Row', 'Column', 'Field', 'ObjNum', 'Unknown', 'Channel']
label_info['Well'] = label_info['Row'] + label_info['Column']
label_info['Row'] = (label_info.Row.map(ord) - ord('A') + 1)
for f in 'Row', 'Column', 'Field', 'ObjNum', 'Unknown':
label_info[f] = label_info[f].astype(int)
# Append these new columns to the data.
dfl = pd.concat([label_info, df], axis=1)
# Extract the metrics from the DAPI channel in cycle 1, from well B06.
well = dfl[(dfl['Well'] == 'B06') & (dfl['Channel'] == 'DAPI-0001')]
# Extract field 1 only.
well_field = well[well['Field'] == 1]
# Plot X vs Y for field 1, visualizing the individual cell positions.
well_field.plot.scatter('X', 'Y')
# Plot X vs Y for all fields.
sns.lmplot('X', 'Y', well, col='Field', col_wrap=4, fit_reg=False)
# Plot fitted elipse major vs minor axes, with a linear fit and confidence
# interval.x
sns.lmplot('Major', 'Minor', well, col='Field', col_wrap=4)
# Extract the "Mean" metric from all channels for all fields in well B06, and
# pivot the result so that each channel becomes its own column.
wmean = pd.pivot_table(dfl[dfl['Well'] == 'B06'],
'Mean', ['Field', 'ObjNum'], 'Channel')
# Show scatter plots for all pairwise channel combinations.
sns.pairplot(wmean, diag_kind='kde',
plot_kws={'edgecolor':'none', 's':1, 'alpha':.3})
plt.gcf().subplots_adjust(bottom=0.05, left=0.05)
# Same as above, except for three wells, each in a different color.
wm3 = pd.pivot_table(
dfl[dfl['Well'].isin(['B02', 'E06', 'G10'])],
'Mean', ['Well', 'Field', 'ObjNum'], 'Channel'
).reset_index('Well')
sns.pairplot(wm3, hue='Well',
plot_kws={'edgecolor':'none', 's':1, 'alpha':.3},
diag_kws={'histtype':'step', 'bins':30})
plt.gcf().subplots_adjust(bottom=0.05, left=0.05)
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment