Last active
May 1, 2017 03:05
-
-
Save santiago-salas-v/4fa5e53c1c35898067d1d3fe3593ca0e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import scipy.stats as stats | |
import colormaps | |
import PySide | |
import pyqtgraph as pg | |
from pyqtgraph.Qt import QtGui, QtCore | |
from pyqtgraph import PlotWidget | |
app = QtGui.QApplication([]) | |
dpi_res = 250 | |
window_size = QtGui.QDesktopWidget().screenGeometry() | |
app_width = window_size.width() * 2 / 5.0 | |
app_height = window_size.height() * 2 / 5.0 | |
wind = pg.GraphicsWindow(title='NIR analysis') | |
p = PlotWidget(wind) | |
wind.resize(app_width, app_height) | |
colormap_colors = colormaps.viridis.colors + \ | |
colormaps.magma.colors + \ | |
colormaps.inferno.colors | |
brilliant_colors_criteria = np.apply_along_axis( | |
lambda x: np.sqrt(x.dot(x)), 1, np.array(colormap_colors) | |
) > 0.25 | |
colormap_colors = [ | |
item for k, item in enumerate(colormap_colors) | |
if brilliant_colors_criteria[k] | |
] | |
symbols = ['t', 't1', 't2', 't3', 's', 'p', 'h', 'star', '+', 'd'] | |
# Extract information from csv | |
# noinspection PyArgumentList | |
filename, _ = QtGui.QFileDialog.getOpenFileName( | |
caption='Open csv file', filter='CSV files (*.csv)' | |
) # Format from PySide: return tuple (filename, filter) | |
df = pd.read_csv( | |
filename, | |
# 'collection_id-6cc94ecd-6339-4f07-bd41-' + | |
# '331868be1d37-4247552878316272446.csv', | |
skiprows=range( | |
0, | |
10) + | |
[11]) | |
df.columns = map(lambda x: x.replace(' ', ''), df.columns) | |
df.sample_id = map(lambda x: x.replace(' ', ''), df.sample_id) | |
raw_wavelengths = [float(item.replace('spectrum_', '')) | |
for item in df.columns | |
if item.startswith('spectrum_') | |
] | |
identifier = 'sample_id' | |
if 'type' in df.keys(): | |
identifier = 'type' | |
df['type'] = df['type'].map(lambda x: x.lstrip(' ')) | |
sample_names = df.type.unique() | |
df_groups = df.groupby(df.type) | |
else: | |
df['sample_id'] = df['sample_id'].map(lambda x: x.lstrip(' ')) | |
sample_names = df.sample_id.unique() | |
df_groups = df.groupby(df.sample_id) | |
def c_int(input_data, confidence=0.95): | |
x = np.array(input_data) | |
n = len(x) | |
m, se = np.mean(x), stats.sem(x) | |
h = se * stats.t.ppf((1 + confidence) / 2.0, n - 1) | |
return h | |
# Calculate derivative of log in new df | |
df_deriv = pd.DataFrame( | |
columns=[identifier] + raw_wavelengths, | |
index=df.index | |
) | |
df_deriv[identifier] = df[identifier] | |
df_deriv[[x for x in df_deriv.columns if x != identifier]] = \ | |
df[[x for x in df.columns if x.startswith('spectrum_')]] | |
df_deriv[[x for x in df_deriv.columns if x != identifier]] = \ | |
df_deriv[[x for x in df_deriv.columns if x != identifier]].apply( | |
lambda x: np.gradient(np.log(x), 1) / np.gradient(x.index, 1), | |
# x.index in this case is the pd.Series with wavelengths | |
axis=1 # axis of every row | |
) | |
means_c_ints = df_groups.agg([np.mean, c_int])[ | |
[ | |
item for item in df.columns if item.startswith('spectrum_') | |
] | |
] | |
means_slice = means_c_ints.loc[(slice(None)), (slice(None), 'mean')] | |
c_int_slice = means_c_ints.loc[(slice(None)), (slice(None), 'c_int')] | |
deriv_means_c_ints = df_deriv.groupby(identifier).agg([np.mean, c_int])[ | |
[ | |
item for item in df_deriv.columns if item != identifier | |
] | |
] | |
deriv_means_slice = deriv_means_c_ints.loc[(slice(None)), (slice(None), 'mean')] | |
deriv_c_int_slice = deriv_means_c_ints.loc[(slice(None)), (slice(None), 'c_int')] | |
# Plotting | |
minimum_color_stdev = 200 | |
color_stdev = 0.0 | |
series_colors = dict() | |
while color_stdev < minimum_color_stdev: | |
series_colors = dict(zip(means_slice.index, [ | |
tuple( | |
[ | |
255 * elem for elem in | |
colormap_colors[np.random.randint( | |
0, len(colormap_colors), 1 | |
).item()]]) for item in means_slice.index | |
])) | |
colors_matrix = np.array([ | |
series_colors[key] for key in series_colors.keys() | |
]) | |
magnitudes = [np.sqrt(vec.dot(vec)) for vec in colors_matrix] | |
# color_stdev = np.std(colors_matrix[:, None, :] - colors_matrix) | |
color_stdev = min( | |
np.apply_along_axis( | |
lambda x: np.sqrt(x.dot(x)), | |
1, | |
np.std(colors_matrix[:, None, :] - colors_matrix, axis=2) | |
) | |
) | |
print series_colors | |
# override randomization (too slow) | |
# series_colors = { | |
# 'aeos': ( | |
# 248.41488, 136.87890000000002, 12.33996), 'prod (fga)': ( | |
# 60.05913, 78.929385, 138.45072), 'blank (ceramic tile)': ( | |
# 238.65552, 229.13535, 27.573405), 'ao': ( | |
# 196.23678, 60.199635, 78.408675), 'di water': ( | |
# 34.104465, 139.876425, 141.152955), 'sxs': ( | |
# 165.050535, 218.89200000000002, 53.514555), 's water': ( | |
# 62.613465000000005, 15.13476, 114.35118)} | |
series_symbols = dict(zip( | |
means_slice.index, [ | |
symbols[ | |
np.random.randint( | |
0, len(symbols), 1).item()] | |
for item in means_slice.index | |
] | |
)) | |
p1 = wind.addPlot(row=1, col=1) | |
p1.addLegend() | |
for series in means_slice.index: | |
# if 'di water' not in series and 's water' not in series and 'ceramic' | |
# not in series: | |
if 'di water' not in series: | |
data = means_slice.loc[series].values | |
normalized_data = data / max(abs(data)) | |
plot_series = data | |
# plot_series = plot_series / max(abs(plot_series)) | |
color = series_colors[series] | |
symbol = series_symbols[series] | |
p1.plot(raw_wavelengths, plot_series, | |
pen=color, | |
name=series, symbol=symbol, symbolBrush=color, | |
symbolPen=None, symbolSize=5) | |
p2 = wind.addPlot(row=2, col=1) | |
# p2.addLegend() | |
for series in means_slice.index: | |
# if 'di water' not in series and 's water' not in series and 'ceramic' | |
# not in series: | |
if 'di water' not in series: | |
data = means_slice.loc[series].values | |
c_int_data = c_int_slice.loc[series].values | |
usl_data = data + c_int_data | |
lsl_data = data - c_int_data | |
# normalized_data = data / max(abs(data)) | |
# noinspection PyUnresolvedReferences | |
plot_series = \ | |
np.gradient(data, 1) / \ | |
np.gradient(raw_wavelengths, 1) | |
# noinspection PyUnresolvedReferences | |
usl_plot_series = \ | |
np.gradient(usl_data, 1) / \ | |
np.gradient(raw_wavelengths, 1) | |
# noinspection PyUnresolvedReferences | |
lsl_plot_series = \ | |
np.gradient(lsl_data, 1) / \ | |
np.gradient(raw_wavelengths, 1) | |
# plot_series = (plot_series - np.mean(plot_series)) / \ | |
# np.std(plot_series) | |
color = series_colors[series] | |
symbol = series_symbols[series] | |
p2.plot(raw_wavelengths, plot_series, | |
pen=color, | |
name=series, symbol=symbol, symbolBrush=color, | |
symbolPen=None, symbolSize=5) | |
p2.plot( | |
raw_wavelengths, | |
usl_plot_series, | |
pen=color, | |
name=series, | |
symbol=None, | |
symbolBrush=color, | |
symbolPen=None, | |
symbolSize=5, | |
style=QtCore.Qt.DashLine, | |
width=3) | |
p2.plot( | |
raw_wavelengths, | |
lsl_plot_series, | |
pen=color, | |
name=series, | |
symbol=None, | |
symbolBrush=color, | |
symbolPen=None, | |
symbolSize=5, | |
style=QtCore.Qt.DashLine, | |
width=3) | |
p3 = wind.addPlot(row=3, col=1) | |
# p3.addLegend() | |
for series in deriv_means_slice.index: | |
# if 'di water' not in series and 's water' not in series and 'ceramic' | |
# not in series: | |
if 'di water' not in series: # and 'ceramic' not in series: | |
data = deriv_means_slice.loc[series].values | |
c_int_data = deriv_c_int_slice.loc[series].values | |
usl_data = data + c_int_data | |
lsl_data = data - c_int_data | |
# normalized_data = data / max(abs(data)) | |
# noinspection PyUnresolvedReferences | |
plot_series = data | |
plot_series = (plot_series - np.mean(plot_series)) / \ | |
np.std(plot_series) | |
# noinspection PyUnresolvedReferences | |
usl_plot_series = usl_data | |
usl_plot_series = (usl_plot_series - np.mean(usl_plot_series)) / \ | |
np.std(usl_plot_series) | |
# noinspection PyUnresolvedReferences | |
lsl_plot_series = lsl_data | |
lsl_plot_series = (lsl_plot_series - np.mean(lsl_plot_series)) / \ | |
np.std(lsl_plot_series) | |
color = series_colors[series] | |
symbol = series_symbols[series] | |
p3.plot(raw_wavelengths, plot_series, | |
pen=color, | |
name=series, symbol=symbol, symbolBrush=color, | |
symbolPen=None, symbolSize=5) | |
p3.plot( | |
raw_wavelengths, | |
lsl_plot_series, | |
pen=color, | |
name=series, | |
symbol=None, | |
symbolBrush=color, | |
symbolPen=None, | |
symbolSize=5, | |
style=QtCore.Qt.DashLine, | |
width=3) | |
p3.plot( | |
raw_wavelengths, | |
usl_plot_series, | |
pen=color, | |
name=series, | |
symbol=None, | |
symbolBrush=color, | |
symbolPen=None, | |
symbolSize=5, | |
style=QtCore.Qt.DashLine, | |
width=3) | |
# p1.legend.setLayoutDirection(QtCore.Qt.LayoutDirection.LeftToRight) | |
# p2.legend.setScale(dpi_res/100.0) | |
# p3.legend.setScale(dpi_res/100.0) | |
labelStyle = {'color': '#FFF', 'font-size': '25pt'} | |
p3.setLabel('bottom', 'Wavelength, nm', **labelStyle) | |
p1.setLabel('top', 'Raw data (Ref.)', **labelStyle) | |
p2.setLabel('top', '1st derivative, with 95%CI' + "'" + 's', **labelStyle) | |
p3.setLabel( | |
'top', | |
'1st derivative of log, normalized, with 95%CI' + | |
"'" + | |
's', | |
**labelStyle) | |
# p1.legend.setStyle(**labelStyle) | |
# p1.legend.setFixedHeight(20) | |
# p1.legend.setMaximumHeight(20) | |
p1.legend.setScale(2.0) | |
# for item in wind.items(): | |
# if isinstance( | |
# item, | |
# QtGui.QGraphicsTextItem): # and item.textWidth() > 0: | |
# item.setFont(QtGui.QFont('Monospaced', 12)) | |
# print item.toHtml() | |
wind.show() | |
# Start Qt event loop unless running in interactive mode. | |
if __name__ == '__main__': | |
import sys | |
if (sys.flags.interactive != 1) or \ | |
not hasattr(QtCore, 'PYQT_VERSION'): | |
# noinspection PyArgumentList | |
QtGui.QApplication.instance().exec_() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment