santiago-salas-v · May 1, 2017 03:05
diff --git a/nir_analysis.py b/nir_analysis.py
 import pandas as pd
 import numpy as np
 import scipy.stats as stats
 import colormaps
 import PySide
 import pyqtgraph as pg
 from pyqtgraph.Qt import QtGui, QtCore
 from pyqtgraph import PlotWidget

 app = QtGui.QApplication([])

 dpi_res = 250
 window_size = QtGui.QDesktopWidget().screenGeometry()
 app_width = window_size.width() * 2 / 5.0
 app_height = window_size.height() * 2 / 5.0
 wind = pg.GraphicsWindow(title='NIR analysis')
 p = PlotWidget(wind)

 wind.resize(app_width, app_height)
 colormap_colors = colormaps.viridis.colors + \
    colormaps.magma.colors + \
    colormaps.inferno.colors
 brilliant_colors_criteria = np.apply_along_axis(
    lambda x: np.sqrt(x.dot(x)), 1, np.array(colormap_colors)
 ) > 0.25
 colormap_colors = [
    item for k, item in enumerate(colormap_colors)
    if brilliant_colors_criteria[k]
 ]
 symbols = ['t', 't1', 't2', 't3', 's', 'p', 'h', 'star', '+', 'd']

 # Extract information from csv

 # noinspection PyArgumentList
 filename, _ = QtGui.QFileDialog.getOpenFileName(
    caption='Open csv file', filter='CSV files (*.csv)'
 ) # Format from PySide: return tuple (filename, filter)

 df = pd.read_csv(
    filename,
    # 'collection_id-6cc94ecd-6339-4f07-bd41-' +
    # '331868be1d37-4247552878316272446.csv',
    skiprows=range(
        0,
        10) +
    [11])

 df.columns = map(lambda x: x.replace(' ', ''), df.columns)
 df.sample_id = map(lambda x: x.replace(' ', ''), df.sample_id)
 raw_wavelengths = [float(item.replace('spectrum_', ''))
                   for item in df.columns
                   if item.startswith('spectrum_')
                   ]

 identifier = 'sample_id'
 if 'type' in df.keys():
    identifier = 'type'
    df['type'] = df['type'].map(lambda x: x.lstrip(' '))
    sample_names = df.type.unique()
    df_groups = df.groupby(df.type)
 else:
    df['sample_id'] = df['sample_id'].map(lambda x: x.lstrip(' '))
    sample_names = df.sample_id.unique()
    df_groups = df.groupby(df.sample_id)


 def c_int(input_data, confidence=0.95):
    x = np.array(input_data)
    n = len(x)
    m, se = np.mean(x), stats.sem(x)
    h = se * stats.t.ppf((1 + confidence) / 2.0, n - 1)
    return h

 # Calculate derivative of log in new df
 df_deriv = pd.DataFrame(
    columns=[identifier] + raw_wavelengths,
    index=df.index
 )
 df_deriv[identifier] = df[identifier]
 df_deriv[[x for x in df_deriv.columns if x != identifier]] = \
    df[[x for x in df.columns if x.startswith('spectrum_')]]
 df_deriv[[x for x in df_deriv.columns if x != identifier]] = \
    df_deriv[[x for x in df_deriv.columns if x != identifier]].apply(
        lambda x: np.gradient(np.log(x), 1) / np.gradient(x.index, 1),
        # x.index in this case is the pd.Series with wavelengths
        axis=1 # axis of every row
    )

 means_c_ints = df_groups.agg([np.mean, c_int])[
    [
        item for item in df.columns if item.startswith('spectrum_')
    ]
 ]
 means_slice = means_c_ints.loc[(slice(None)), (slice(None), 'mean')]
 c_int_slice = means_c_ints.loc[(slice(None)), (slice(None), 'c_int')]

 deriv_means_c_ints = df_deriv.groupby(identifier).agg([np.mean, c_int])[
    [
        item for item in df_deriv.columns if item != identifier
    ]
 ]
 deriv_means_slice = deriv_means_c_ints.loc[(slice(None)), (slice(None), 'mean')]
 deriv_c_int_slice = deriv_means_c_ints.loc[(slice(None)), (slice(None), 'c_int')]

 # Plotting
 minimum_color_stdev = 200
 color_stdev = 0.0
 series_colors = dict()
 while color_stdev < minimum_color_stdev:
    series_colors = dict(zip(means_slice.index, [
        tuple(
            [
                255 * elem for elem in
                colormap_colors[np.random.randint(
                    0, len(colormap_colors), 1
                ).item()]]) for item in means_slice.index
    ]))

    colors_matrix = np.array([
        series_colors[key] for key in series_colors.keys()
    ])

    magnitudes = [np.sqrt(vec.dot(vec)) for vec in colors_matrix]

    # color_stdev = np.std(colors_matrix[:, None, :] - colors_matrix)

    color_stdev = min(
        np.apply_along_axis(
            lambda x: np.sqrt(x.dot(x)),
            1,
            np.std(colors_matrix[:, None, :] - colors_matrix, axis=2)
        )
    )

 print series_colors

 # override randomization (too slow)
 # series_colors = {
 #     'aeos': (
 #         248.41488, 136.87890000000002, 12.33996), 'prod (fga)': (
 #             60.05913, 78.929385, 138.45072), 'blank (ceramic tile)': (
 #                 238.65552, 229.13535, 27.573405), 'ao': (
 #                     196.23678, 60.199635, 78.408675), 'di water': (
 #                         34.104465, 139.876425, 141.152955), 'sxs': (
 #                             165.050535, 218.89200000000002, 53.514555), 's water': (
 #                                 62.613465000000005, 15.13476, 114.35118)}

 series_symbols = dict(zip(
    means_slice.index, [
        symbols[
            np.random.randint(
                0, len(symbols), 1).item()]
        for item in means_slice.index
    ]
 ))

 p1 = wind.addPlot(row=1, col=1)
 p1.addLegend()
 for series in means_slice.index:
    # if 'di water' not in series and 's water' not in series and 'ceramic'
    # not in series:
    if 'di water' not in series:
        data = means_slice.loc[series].values
        normalized_data = data / max(abs(data))
        plot_series = data
        # plot_series = plot_series / max(abs(plot_series))
        color = series_colors[series]
        symbol = series_symbols[series]
        p1.plot(raw_wavelengths, plot_series,
                pen=color,
                name=series, symbol=symbol, symbolBrush=color,
                symbolPen=None, symbolSize=5)

 p2 = wind.addPlot(row=2, col=1)
 # p2.addLegend()
 for series in means_slice.index:
    # if 'di water' not in series and 's water' not in series and 'ceramic'
    # not in series:
    if 'di water' not in series:
        data = means_slice.loc[series].values
        c_int_data = c_int_slice.loc[series].values
        usl_data = data + c_int_data
        lsl_data = data - c_int_data
        # normalized_data = data / max(abs(data))
        # noinspection PyUnresolvedReferences
        plot_series = \
            np.gradient(data, 1) / \
            np.gradient(raw_wavelengths, 1)
        # noinspection PyUnresolvedReferences
        usl_plot_series = \
            np.gradient(usl_data, 1) / \
            np.gradient(raw_wavelengths, 1)
        # noinspection PyUnresolvedReferences
        lsl_plot_series = \
            np.gradient(lsl_data, 1) / \
            np.gradient(raw_wavelengths, 1)
        # plot_series = (plot_series - np.mean(plot_series)) / \
        #              np.std(plot_series)
        color = series_colors[series]
        symbol = series_symbols[series]
        p2.plot(raw_wavelengths, plot_series,
                pen=color,
                name=series, symbol=symbol, symbolBrush=color,
                symbolPen=None, symbolSize=5)
        p2.plot(
            raw_wavelengths,
            usl_plot_series,
            pen=color,
            name=series,
            symbol=None,
            symbolBrush=color,
            symbolPen=None,
            symbolSize=5,
            style=QtCore.Qt.DashLine,
            width=3)
        p2.plot(
            raw_wavelengths,
            lsl_plot_series,
            pen=color,
            name=series,
            symbol=None,
            symbolBrush=color,
            symbolPen=None,
            symbolSize=5,
            style=QtCore.Qt.DashLine,
            width=3)

 p3 = wind.addPlot(row=3, col=1)
 # p3.addLegend()
 for series in deriv_means_slice.index:
    # if 'di water' not in series and 's water' not in series and 'ceramic'
    # not in series:
    if 'di water' not in series:  # and 'ceramic' not in series:
        data = deriv_means_slice.loc[series].values
        c_int_data = deriv_c_int_slice.loc[series].values
        usl_data = data + c_int_data
        lsl_data = data - c_int_data
        # normalized_data = data / max(abs(data))
        # noinspection PyUnresolvedReferences
        plot_series = data
        plot_series = (plot_series - np.mean(plot_series)) / \
            np.std(plot_series)
        # noinspection PyUnresolvedReferences
        usl_plot_series = usl_data
        usl_plot_series = (usl_plot_series - np.mean(usl_plot_series)) / \
            np.std(usl_plot_series)
        # noinspection PyUnresolvedReferences
        lsl_plot_series = lsl_data
        lsl_plot_series = (lsl_plot_series - np.mean(lsl_plot_series)) / \
            np.std(lsl_plot_series)
        color = series_colors[series]
        symbol = series_symbols[series]
        p3.plot(raw_wavelengths, plot_series,
                pen=color,
                name=series, symbol=symbol, symbolBrush=color,
                symbolPen=None, symbolSize=5)
        p3.plot(
            raw_wavelengths,
            lsl_plot_series,
            pen=color,
            name=series,
            symbol=None,
            symbolBrush=color,
            symbolPen=None,
            symbolSize=5,
            style=QtCore.Qt.DashLine,
            width=3)
        p3.plot(
            raw_wavelengths,
            usl_plot_series,
            pen=color,
            name=series,
            symbol=None,
            symbolBrush=color,
            symbolPen=None,
            symbolSize=5,
            style=QtCore.Qt.DashLine,
            width=3)

 # p1.legend.setLayoutDirection(QtCore.Qt.LayoutDirection.LeftToRight)
 # p2.legend.setScale(dpi_res/100.0)
 # p3.legend.setScale(dpi_res/100.0)
 labelStyle = {'color': '#FFF', 'font-size': '25pt'}
 p3.setLabel('bottom', 'Wavelength, nm', **labelStyle)
 p1.setLabel('top', 'Raw data (Ref.)', **labelStyle)
 p2.setLabel('top', '1st derivative, with 95%CI' + "'" + 's', **labelStyle)
 p3.setLabel(
    'top',
    '1st derivative of log, normalized, with 95%CI' +
    "'" +
    's',
    **labelStyle)
 # p1.legend.setStyle(**labelStyle)
 # p1.legend.setFixedHeight(20)
 # p1.legend.setMaximumHeight(20)
 p1.legend.setScale(2.0)

 # for item in wind.items():
 #     if isinstance(
 #             item,
 #             QtGui.QGraphicsTextItem):  # and item.textWidth() > 0:
 #         item.setFont(QtGui.QFont('Monospaced', 12))
 #         print item.toHtml()

 wind.show()

 # Start Qt event loop unless running in interactive mode.
 if __name__ == '__main__':
    import sys

    if (sys.flags.interactive != 1) or \
            not hasattr(QtCore, 'PYQT_VERSION'):
        # noinspection PyArgumentList
        QtGui.QApplication.instance().exec_()
	import pandas as pd
	import numpy as np
	import scipy.stats as stats
	import colormaps
	import PySide
	import pyqtgraph as pg
	from pyqtgraph.Qt import QtGui, QtCore
	from pyqtgraph import PlotWidget

	app = QtGui.QApplication([])

	dpi_res = 250
	window_size = QtGui.QDesktopWidget().screenGeometry()
	app_width = window_size.width() * 2 / 5.0
	app_height = window_size.height() * 2 / 5.0
	wind = pg.GraphicsWindow(title='NIR analysis')
	p = PlotWidget(wind)

	wind.resize(app_width, app_height)
	colormap_colors = colormaps.viridis.colors + \
	colormaps.magma.colors + \
	colormaps.inferno.colors
	brilliant_colors_criteria = np.apply_along_axis(
	lambda x: np.sqrt(x.dot(x)), 1, np.array(colormap_colors)
	) > 0.25
	colormap_colors = [
	item for k, item in enumerate(colormap_colors)
	if brilliant_colors_criteria[k]
	]
	symbols = ['t', 't1', 't2', 't3', 's', 'p', 'h', 'star', '+', 'd']

	# Extract information from csv

	# noinspection PyArgumentList
	filename, _ = QtGui.QFileDialog.getOpenFileName(
	caption='Open csv file', filter='CSV files (*.csv)'
	) # Format from PySide: return tuple (filename, filter)

	df = pd.read_csv(
	filename,
	# 'collection_id-6cc94ecd-6339-4f07-bd41-' +
	# '331868be1d37-4247552878316272446.csv',
	skiprows=range(
	0,
	10) +
	[11])

	df.columns = map(lambda x: x.replace(' ', ''), df.columns)
	df.sample_id = map(lambda x: x.replace(' ', ''), df.sample_id)
	raw_wavelengths = [float(item.replace('spectrum_', ''))
	for item in df.columns
	if item.startswith('spectrum_')
	]

	identifier = 'sample_id'
	if 'type' in df.keys():
	identifier = 'type'
	df['type'] = df['type'].map(lambda x: x.lstrip(' '))
	sample_names = df.type.unique()
	df_groups = df.groupby(df.type)
	else:
	df['sample_id'] = df['sample_id'].map(lambda x: x.lstrip(' '))
	sample_names = df.sample_id.unique()
	df_groups = df.groupby(df.sample_id)


	def c_int(input_data, confidence=0.95):
	x = np.array(input_data)
	n = len(x)
	m, se = np.mean(x), stats.sem(x)
	h = se * stats.t.ppf((1 + confidence) / 2.0, n - 1)
	return h

	# Calculate derivative of log in new df
	df_deriv = pd.DataFrame(
	columns=[identifier] + raw_wavelengths,
	index=df.index
	)
	df_deriv[identifier] = df[identifier]
	df_deriv[[x for x in df_deriv.columns if x != identifier]] = \
	df[[x for x in df.columns if x.startswith('spectrum_')]]
	df_deriv[[x for x in df_deriv.columns if x != identifier]] = \
	df_deriv[[x for x in df_deriv.columns if x != identifier]].apply(
	lambda x: np.gradient(np.log(x), 1) / np.gradient(x.index, 1),
	# x.index in this case is the pd.Series with wavelengths
	axis=1 # axis of every row
	)

	means_c_ints = df_groups.agg([np.mean, c_int])[
	[
	item for item in df.columns if item.startswith('spectrum_')
	]
	]
	means_slice = means_c_ints.loc[(slice(None)), (slice(None), 'mean')]
	c_int_slice = means_c_ints.loc[(slice(None)), (slice(None), 'c_int')]

	deriv_means_c_ints = df_deriv.groupby(identifier).agg([np.mean, c_int])[
	[
	item for item in df_deriv.columns if item != identifier
	]
	]
	deriv_means_slice = deriv_means_c_ints.loc[(slice(None)), (slice(None), 'mean')]
	deriv_c_int_slice = deriv_means_c_ints.loc[(slice(None)), (slice(None), 'c_int')]

	# Plotting
	minimum_color_stdev = 200
	color_stdev = 0.0
	series_colors = dict()
	while color_stdev < minimum_color_stdev:
	series_colors = dict(zip(means_slice.index, [
	tuple(
	[
	255 * elem for elem in
	colormap_colors[np.random.randint(
	0, len(colormap_colors), 1
	).item()]]) for item in means_slice.index
	]))

	colors_matrix = np.array([
	series_colors[key] for key in series_colors.keys()
	])

	magnitudes = [np.sqrt(vec.dot(vec)) for vec in colors_matrix]

	# color_stdev = np.std(colors_matrix[:, None, :] - colors_matrix)

	color_stdev = min(
	np.apply_along_axis(
	lambda x: np.sqrt(x.dot(x)),
	1,
	np.std(colors_matrix[:, None, :] - colors_matrix, axis=2)
	)
	)

	print series_colors

	# override randomization (too slow)
	# series_colors = {
	# 'aeos': (
	# 248.41488, 136.87890000000002, 12.33996), 'prod (fga)': (
	# 60.05913, 78.929385, 138.45072), 'blank (ceramic tile)': (
	# 238.65552, 229.13535, 27.573405), 'ao': (
	# 196.23678, 60.199635, 78.408675), 'di water': (
	# 34.104465, 139.876425, 141.152955), 'sxs': (
	# 165.050535, 218.89200000000002, 53.514555), 's water': (
	# 62.613465000000005, 15.13476, 114.35118)}

	series_symbols = dict(zip(
	means_slice.index, [
	symbols[
	np.random.randint(
	0, len(symbols), 1).item()]
	for item in means_slice.index
	]
	))

	p1 = wind.addPlot(row=1, col=1)
	p1.addLegend()
	for series in means_slice.index:
	# if 'di water' not in series and 's water' not in series and 'ceramic'
	# not in series:
	if 'di water' not in series:
	data = means_slice.loc[series].values
	normalized_data = data / max(abs(data))
	plot_series = data
	# plot_series = plot_series / max(abs(plot_series))
	color = series_colors[series]
	symbol = series_symbols[series]
	p1.plot(raw_wavelengths, plot_series,
	pen=color,
	name=series, symbol=symbol, symbolBrush=color,
	symbolPen=None, symbolSize=5)

	p2 = wind.addPlot(row=2, col=1)
	# p2.addLegend()
	for series in means_slice.index:
	# if 'di water' not in series and 's water' not in series and 'ceramic'
	# not in series:
	if 'di water' not in series:
	data = means_slice.loc[series].values
	c_int_data = c_int_slice.loc[series].values
	usl_data = data + c_int_data
	lsl_data = data - c_int_data
	# normalized_data = data / max(abs(data))
	# noinspection PyUnresolvedReferences
	plot_series = \
	np.gradient(data, 1) / \
	np.gradient(raw_wavelengths, 1)
	# noinspection PyUnresolvedReferences
	usl_plot_series = \
	np.gradient(usl_data, 1) / \
	np.gradient(raw_wavelengths, 1)
	# noinspection PyUnresolvedReferences
	lsl_plot_series = \
	np.gradient(lsl_data, 1) / \
	np.gradient(raw_wavelengths, 1)
	# plot_series = (plot_series - np.mean(plot_series)) / \
	# np.std(plot_series)
	color = series_colors[series]
	symbol = series_symbols[series]
	p2.plot(raw_wavelengths, plot_series,
	pen=color,
	name=series, symbol=symbol, symbolBrush=color,
	symbolPen=None, symbolSize=5)
	p2.plot(
	raw_wavelengths,
	usl_plot_series,
	pen=color,
	name=series,
	symbol=None,
	symbolBrush=color,
	symbolPen=None,
	symbolSize=5,
	style=QtCore.Qt.DashLine,
	width=3)
	p2.plot(
	raw_wavelengths,
	lsl_plot_series,
	pen=color,
	name=series,
	symbol=None,
	symbolBrush=color,
	symbolPen=None,
	symbolSize=5,
	style=QtCore.Qt.DashLine,
	width=3)

	p3 = wind.addPlot(row=3, col=1)
	# p3.addLegend()
	for series in deriv_means_slice.index:
	# if 'di water' not in series and 's water' not in series and 'ceramic'
	# not in series:
	if 'di water' not in series: # and 'ceramic' not in series:
	data = deriv_means_slice.loc[series].values
	c_int_data = deriv_c_int_slice.loc[series].values
	usl_data = data + c_int_data
	lsl_data = data - c_int_data
	# normalized_data = data / max(abs(data))
	# noinspection PyUnresolvedReferences
	plot_series = data
	plot_series = (plot_series - np.mean(plot_series)) / \
	np.std(plot_series)
	# noinspection PyUnresolvedReferences
	usl_plot_series = usl_data
	usl_plot_series = (usl_plot_series - np.mean(usl_plot_series)) / \
	np.std(usl_plot_series)
	# noinspection PyUnresolvedReferences
	lsl_plot_series = lsl_data
	lsl_plot_series = (lsl_plot_series - np.mean(lsl_plot_series)) / \
	np.std(lsl_plot_series)
	color = series_colors[series]
	symbol = series_symbols[series]
	p3.plot(raw_wavelengths, plot_series,
	pen=color,
	name=series, symbol=symbol, symbolBrush=color,
	symbolPen=None, symbolSize=5)
	p3.plot(
	raw_wavelengths,
	lsl_plot_series,
	pen=color,
	name=series,
	symbol=None,
	symbolBrush=color,
	symbolPen=None,
	symbolSize=5,
	style=QtCore.Qt.DashLine,
	width=3)
	p3.plot(
	raw_wavelengths,
	usl_plot_series,
	pen=color,
	name=series,
	symbol=None,
	symbolBrush=color,
	symbolPen=None,
	symbolSize=5,
	style=QtCore.Qt.DashLine,
	width=3)

	# p1.legend.setLayoutDirection(QtCore.Qt.LayoutDirection.LeftToRight)
	# p2.legend.setScale(dpi_res/100.0)
	# p3.legend.setScale(dpi_res/100.0)
	labelStyle = {'color': '#FFF', 'font-size': '25pt'}
	p3.setLabel('bottom', 'Wavelength, nm', **labelStyle)
	p1.setLabel('top', 'Raw data (Ref.)', **labelStyle)
	p2.setLabel('top', '1st derivative, with 95%CI' + "'" + 's', **labelStyle)
	p3.setLabel(
	'top',
	'1st derivative of log, normalized, with 95%CI' +
	"'" +
	's',
	**labelStyle)
	# p1.legend.setStyle(**labelStyle)
	# p1.legend.setFixedHeight(20)
	# p1.legend.setMaximumHeight(20)
	p1.legend.setScale(2.0)

	# for item in wind.items():
	# if isinstance(
	# item,
	# QtGui.QGraphicsTextItem): # and item.textWidth() > 0:
	# item.setFont(QtGui.QFont('Monospaced', 12))
	# print item.toHtml()

	wind.show()

	# Start Qt event loop unless running in interactive mode.
	if __name__ == '__main__':
	import sys

	if (sys.flags.interactive != 1) or \
	not hasattr(QtCore, 'PYQT_VERSION'):
	# noinspection PyArgumentList
	QtGui.QApplication.instance().exec_()