jmuhlich · November 9, 2017 18:55
diff --git a/make_design.py b/make_design.py
 import itertools
 import pandas as pd

 # Create lists with the different factors.
 rows = ['B', 'C', 'D', 'E', 'F', 'G']
 columns = range(2, 10 + 1)
 fields = range(1, 12 + 1)
 compartments = ['cyto', 'nuc']
 channels = ['DAPI', 'Cy3', 'Cy5', 'FITC']
 cycles = [1, 2]
 # Generate a dataframe with all combinations of all factors (Cartesian product).
 design = pd.DataFrame(list(itertools.product(
    rows, columns, fields, compartments, channels, cycles
 )))
 design.columns = ['Row', 'Column', 'Field', 'Compartment', 'Channel', 'Cycle']

 # Add some helpful columns that are made up by combining other factors.
 design['Well'] = (
    design['Row']
    + design['Column'].astype(str).str.pad(2, fillchar='0')
 )
 design['ChannelCycle'] = (
    design['Channel']
    + '-'
    + design['Cycle'].astype(str).str.pad(4, fillchar='0')
 )

 # Fill in the Day (timepoint) base on a pattern on Row.
 design.loc[design['Row'].isin(['B', 'C', 'D']), 'Day'] = 3
 design.loc[design['Row'].isin(['E', 'F', 'G']), 'Day'] = 2

 # Fill in the Treatment by applying the below "matrix" over patterns on Row and
 # Column.
 design['Treatment'] = None
 trt_matrix = [
    ['DMSO', 'Fulv', 'Alp'],
    ['Tram', 'Fulv+Alp', 'Fulv+Tram'],
    ['Alp+Tram', 'Fulv+Alp+Tram', 'DMSO']
 ]
 tm_cols = [[2,5,8], [3,6,9], [4,7,10]]
 tm_rows = [['B', 'E'], ['C', 'F'], ['D', 'G']]
 for c, columns in enumerate(tm_cols):
    for r, rows in enumerate(tm_rows):
        idx = (design['Column'].isin(columns)) & (design['Row'].isin(rows))
        design.loc[idx, 'Treatment'] = trt_matrix[c][r]


 # Fill in the Marker by applying various patterns on Column.
 markers = pd.Series(index=design.index)

 markers.loc[design['Channel'] == 'DAPI'] = 'DAPI'

 markers.loc[design['ChannelCycle'] == 'Cy3-0001'] = 'pRB'

 markers.loc[(design['ChannelCycle'] == 'Cy5-0000') & design['Column'].isin([2,3,4])] = 'cMyc'
 markers.loc[(design['ChannelCycle'] == 'Cy5-0000') & design['Column'].isin([5,6,7])] = 'pStat3'
 markers.loc[(design['ChannelCycle'] == 'Cy5-0000') & design['Column'].isin([8,9,10])] = 'cJun'

 markers.loc[(design['ChannelCycle'] == 'Cy5-0001') & design['Column'].isin([2,3,4])] = 'p21'
 markers.loc[(design['ChannelCycle'] == 'Cy5-0001') & design['Column'].isin([5,6,7])] = 'NFkB'

 markers.loc[(design['ChannelCycle'] == 'FITC-0001') & design['Column'].isin([2,3,4])] = 'CyclinD'
 markers.loc[(design['ChannelCycle'] == 'FITC-0001') & design['Column'].isin([5,6,7])] = 'ERa'
 markers.loc[(design['ChannelCycle'] == 'FITC-0001') & design['Column'].isin([8,9,10])] = 'pS6-240'

 design['Marker'] = markers
diff --git a/plot.py b/plot.py
 import glob
 import pandas as pd
 import seaborn as sns
 import matplotlib.pyplot as plt

 # Load all Cyto*.txt files into a single dataframe.
 df = pd.concat([
    pd.read_table(path, index_col=0)
    for path in glob.glob('Results-Cyto-*.txt')
 ])

 # Parse the Label column into its components, in a new dataframe.
 label_info = df.Label.str.extract(r'([A-Z])(\d\d)_fld(\d+):(\d+)-(\d+):(.*)',
                                  expand=True)
 label_info.columns = ['Row', 'Column', 'Field', 'ObjNum', 'Unknown', 'Channel']
 label_info['Well'] = label_info['Row'] + label_info['Column']
 label_info['Row'] = (label_info.Row.map(ord) - ord('A') + 1)
 for f in 'Row', 'Column', 'Field', 'ObjNum', 'Unknown':
    label_info[f] = label_info[f].astype(int)
 # Append these new columns to the data.
 dfl = pd.concat([label_info, df], axis=1)

 # Extract the metrics from the DAPI channel in cycle 1, from well B06.
 well = dfl[(dfl['Well'] == 'B06') & (dfl['Channel'] == 'DAPI-0001')]
 # Extract field 1 only.
 well_field = well[well['Field'] == 1]

 # Plot X vs Y for field 1, visualizing the individual cell positions.
 well_field.plot.scatter('X', 'Y')
 # Plot X vs Y for all fields.
 sns.lmplot('X', 'Y', well, col='Field', col_wrap=4, fit_reg=False)
 # Plot fitted elipse major vs minor axes, with a linear fit and confidence
 # interval.x
 sns.lmplot('Major', 'Minor', well, col='Field', col_wrap=4)

 # Extract the "Mean" metric from all channels for all fields in well B06, and
 # pivot the result so that each channel becomes its own column.
 wmean = pd.pivot_table(dfl[dfl['Well'] == 'B06'],
                       'Mean', ['Field', 'ObjNum'], 'Channel')
 # Show scatter plots for all pairwise channel combinations.
 sns.pairplot(wmean, diag_kind='kde',
             plot_kws={'edgecolor':'none', 's':1, 'alpha':.3})
 plt.gcf().subplots_adjust(bottom=0.05, left=0.05)

 # Same as above, except for three wells, each in a different color.
 wm3 = pd.pivot_table(
    dfl[dfl['Well'].isin(['B02', 'E06', 'G10'])],
    'Mean', ['Well', 'Field', 'ObjNum'], 'Channel'
 ).reset_index('Well')
 sns.pairplot(wm3, hue='Well',
             plot_kws={'edgecolor':'none', 's':1, 'alpha':.3},
             diag_kws={'histtype':'step', 'bins':30})
 plt.gcf().subplots_adjust(bottom=0.05, left=0.05)

 plt.show()
	import itertools
	import pandas as pd

	# Create lists with the different factors.
	rows = ['B', 'C', 'D', 'E', 'F', 'G']
	columns = range(2, 10 + 1)
	fields = range(1, 12 + 1)
	compartments = ['cyto', 'nuc']
	channels = ['DAPI', 'Cy3', 'Cy5', 'FITC']
	cycles = [1, 2]
	# Generate a dataframe with all combinations of all factors (Cartesian product).
	design = pd.DataFrame(list(itertools.product(
	rows, columns, fields, compartments, channels, cycles
	)))
	design.columns = ['Row', 'Column', 'Field', 'Compartment', 'Channel', 'Cycle']

	# Add some helpful columns that are made up by combining other factors.
	design['Well'] = (
	design['Row']
	+ design['Column'].astype(str).str.pad(2, fillchar='0')
	)
	design['ChannelCycle'] = (
	design['Channel']
	+ '-'
	+ design['Cycle'].astype(str).str.pad(4, fillchar='0')
	)

	# Fill in the Day (timepoint) base on a pattern on Row.
	design.loc[design['Row'].isin(['B', 'C', 'D']), 'Day'] = 3
	design.loc[design['Row'].isin(['E', 'F', 'G']), 'Day'] = 2

	# Fill in the Treatment by applying the below "matrix" over patterns on Row and
	# Column.
	design['Treatment'] = None
	trt_matrix = [
	['DMSO', 'Fulv', 'Alp'],
	['Tram', 'Fulv+Alp', 'Fulv+Tram'],
	['Alp+Tram', 'Fulv+Alp+Tram', 'DMSO']
	]
	tm_cols = [[2,5,8], [3,6,9], [4,7,10]]
	tm_rows = [['B', 'E'], ['C', 'F'], ['D', 'G']]
	for c, columns in enumerate(tm_cols):
	for r, rows in enumerate(tm_rows):
	idx = (design['Column'].isin(columns)) & (design['Row'].isin(rows))
	design.loc[idx, 'Treatment'] = trt_matrix[c][r]


	# Fill in the Marker by applying various patterns on Column.
	markers = pd.Series(index=design.index)

	markers.loc[design['Channel'] == 'DAPI'] = 'DAPI'

	markers.loc[design['ChannelCycle'] == 'Cy3-0001'] = 'pRB'

	markers.loc[(design['ChannelCycle'] == 'Cy5-0000') & design['Column'].isin([2,3,4])] = 'cMyc'
	markers.loc[(design['ChannelCycle'] == 'Cy5-0000') & design['Column'].isin([5,6,7])] = 'pStat3'
	markers.loc[(design['ChannelCycle'] == 'Cy5-0000') & design['Column'].isin([8,9,10])] = 'cJun'

	markers.loc[(design['ChannelCycle'] == 'Cy5-0001') & design['Column'].isin([2,3,4])] = 'p21'
	markers.loc[(design['ChannelCycle'] == 'Cy5-0001') & design['Column'].isin([5,6,7])] = 'NFkB'

	markers.loc[(design['ChannelCycle'] == 'FITC-0001') & design['Column'].isin([2,3,4])] = 'CyclinD'
	markers.loc[(design['ChannelCycle'] == 'FITC-0001') & design['Column'].isin([5,6,7])] = 'ERa'
	markers.loc[(design['ChannelCycle'] == 'FITC-0001') & design['Column'].isin([8,9,10])] = 'pS6-240'

	design['Marker'] = markers
	import glob
	import pandas as pd
	import seaborn as sns
	import matplotlib.pyplot as plt

	# Load all Cyto*.txt files into a single dataframe.
	df = pd.concat([
	pd.read_table(path, index_col=0)
	for path in glob.glob('Results-Cyto-*.txt')
	])

	# Parse the Label column into its components, in a new dataframe.
	label_info = df.Label.str.extract(r'([A-Z])(\d\d)_fld(\d+):(\d+)-(\d+):(.*)',
	expand=True)
	label_info.columns = ['Row', 'Column', 'Field', 'ObjNum', 'Unknown', 'Channel']
	label_info['Well'] = label_info['Row'] + label_info['Column']
	label_info['Row'] = (label_info.Row.map(ord) - ord('A') + 1)
	for f in 'Row', 'Column', 'Field', 'ObjNum', 'Unknown':
	label_info[f] = label_info[f].astype(int)
	# Append these new columns to the data.
	dfl = pd.concat([label_info, df], axis=1)

	# Extract the metrics from the DAPI channel in cycle 1, from well B06.
	well = dfl[(dfl['Well'] == 'B06') & (dfl['Channel'] == 'DAPI-0001')]
	# Extract field 1 only.
	well_field = well[well['Field'] == 1]

	# Plot X vs Y for field 1, visualizing the individual cell positions.
	well_field.plot.scatter('X', 'Y')
	# Plot X vs Y for all fields.
	sns.lmplot('X', 'Y', well, col='Field', col_wrap=4, fit_reg=False)
	# Plot fitted elipse major vs minor axes, with a linear fit and confidence
	# interval.x
	sns.lmplot('Major', 'Minor', well, col='Field', col_wrap=4)

	# Extract the "Mean" metric from all channels for all fields in well B06, and
	# pivot the result so that each channel becomes its own column.
	wmean = pd.pivot_table(dfl[dfl['Well'] == 'B06'],
	'Mean', ['Field', 'ObjNum'], 'Channel')
	# Show scatter plots for all pairwise channel combinations.
	sns.pairplot(wmean, diag_kind='kde',
	plot_kws={'edgecolor':'none', 's':1, 'alpha':.3})
	plt.gcf().subplots_adjust(bottom=0.05, left=0.05)

	# Same as above, except for three wells, each in a different color.
	wm3 = pd.pivot_table(
	dfl[dfl['Well'].isin(['B02', 'E06', 'G10'])],
	'Mean', ['Well', 'Field', 'ObjNum'], 'Channel'
	).reset_index('Well')
	sns.pairplot(wm3, hue='Well',
	plot_kws={'edgecolor':'none', 's':1, 'alpha':.3},
	diag_kws={'histtype':'step', 'bins':30})
	plt.gcf().subplots_adjust(bottom=0.05, left=0.05)

	plt.show()