Skip to content

Instantly share code, notes, and snippets.

@santiago-salas-v
Last active May 1, 2017 03:05
Show Gist options
  • Save santiago-salas-v/4fa5e53c1c35898067d1d3fe3593ca0e to your computer and use it in GitHub Desktop.
Save santiago-salas-v/4fa5e53c1c35898067d1d3fe3593ca0e to your computer and use it in GitHub Desktop.
import pandas as pd
import numpy as np
import scipy.stats as stats
import colormaps
import PySide
import pyqtgraph as pg
from pyqtgraph.Qt import QtGui, QtCore
from pyqtgraph import PlotWidget
app = QtGui.QApplication([])
dpi_res = 250
window_size = QtGui.QDesktopWidget().screenGeometry()
app_width = window_size.width() * 2 / 5.0
app_height = window_size.height() * 2 / 5.0
wind = pg.GraphicsWindow(title='NIR analysis')
p = PlotWidget(wind)
wind.resize(app_width, app_height)
colormap_colors = colormaps.viridis.colors + \
colormaps.magma.colors + \
colormaps.inferno.colors
brilliant_colors_criteria = np.apply_along_axis(
lambda x: np.sqrt(x.dot(x)), 1, np.array(colormap_colors)
) > 0.25
colormap_colors = [
item for k, item in enumerate(colormap_colors)
if brilliant_colors_criteria[k]
]
symbols = ['t', 't1', 't2', 't3', 's', 'p', 'h', 'star', '+', 'd']
# Extract information from csv
# noinspection PyArgumentList
filename, _ = QtGui.QFileDialog.getOpenFileName(
caption='Open csv file', filter='CSV files (*.csv)'
) # Format from PySide: return tuple (filename, filter)
df = pd.read_csv(
filename,
# 'collection_id-6cc94ecd-6339-4f07-bd41-' +
# '331868be1d37-4247552878316272446.csv',
skiprows=range(
0,
10) +
[11])
df.columns = map(lambda x: x.replace(' ', ''), df.columns)
df.sample_id = map(lambda x: x.replace(' ', ''), df.sample_id)
raw_wavelengths = [float(item.replace('spectrum_', ''))
for item in df.columns
if item.startswith('spectrum_')
]
identifier = 'sample_id'
if 'type' in df.keys():
identifier = 'type'
df['type'] = df['type'].map(lambda x: x.lstrip(' '))
sample_names = df.type.unique()
df_groups = df.groupby(df.type)
else:
df['sample_id'] = df['sample_id'].map(lambda x: x.lstrip(' '))
sample_names = df.sample_id.unique()
df_groups = df.groupby(df.sample_id)
def c_int(input_data, confidence=0.95):
x = np.array(input_data)
n = len(x)
m, se = np.mean(x), stats.sem(x)
h = se * stats.t.ppf((1 + confidence) / 2.0, n - 1)
return h
# Calculate derivative of log in new df
df_deriv = pd.DataFrame(
columns=[identifier] + raw_wavelengths,
index=df.index
)
df_deriv[identifier] = df[identifier]
df_deriv[[x for x in df_deriv.columns if x != identifier]] = \
df[[x for x in df.columns if x.startswith('spectrum_')]]
df_deriv[[x for x in df_deriv.columns if x != identifier]] = \
df_deriv[[x for x in df_deriv.columns if x != identifier]].apply(
lambda x: np.gradient(np.log(x), 1) / np.gradient(x.index, 1),
# x.index in this case is the pd.Series with wavelengths
axis=1 # axis of every row
)
means_c_ints = df_groups.agg([np.mean, c_int])[
[
item for item in df.columns if item.startswith('spectrum_')
]
]
means_slice = means_c_ints.loc[(slice(None)), (slice(None), 'mean')]
c_int_slice = means_c_ints.loc[(slice(None)), (slice(None), 'c_int')]
deriv_means_c_ints = df_deriv.groupby(identifier).agg([np.mean, c_int])[
[
item for item in df_deriv.columns if item != identifier
]
]
deriv_means_slice = deriv_means_c_ints.loc[(slice(None)), (slice(None), 'mean')]
deriv_c_int_slice = deriv_means_c_ints.loc[(slice(None)), (slice(None), 'c_int')]
# Plotting
minimum_color_stdev = 200
color_stdev = 0.0
series_colors = dict()
while color_stdev < minimum_color_stdev:
series_colors = dict(zip(means_slice.index, [
tuple(
[
255 * elem for elem in
colormap_colors[np.random.randint(
0, len(colormap_colors), 1
).item()]]) for item in means_slice.index
]))
colors_matrix = np.array([
series_colors[key] for key in series_colors.keys()
])
magnitudes = [np.sqrt(vec.dot(vec)) for vec in colors_matrix]
# color_stdev = np.std(colors_matrix[:, None, :] - colors_matrix)
color_stdev = min(
np.apply_along_axis(
lambda x: np.sqrt(x.dot(x)),
1,
np.std(colors_matrix[:, None, :] - colors_matrix, axis=2)
)
)
print series_colors
# override randomization (too slow)
# series_colors = {
# 'aeos': (
# 248.41488, 136.87890000000002, 12.33996), 'prod (fga)': (
# 60.05913, 78.929385, 138.45072), 'blank (ceramic tile)': (
# 238.65552, 229.13535, 27.573405), 'ao': (
# 196.23678, 60.199635, 78.408675), 'di water': (
# 34.104465, 139.876425, 141.152955), 'sxs': (
# 165.050535, 218.89200000000002, 53.514555), 's water': (
# 62.613465000000005, 15.13476, 114.35118)}
series_symbols = dict(zip(
means_slice.index, [
symbols[
np.random.randint(
0, len(symbols), 1).item()]
for item in means_slice.index
]
))
p1 = wind.addPlot(row=1, col=1)
p1.addLegend()
for series in means_slice.index:
# if 'di water' not in series and 's water' not in series and 'ceramic'
# not in series:
if 'di water' not in series:
data = means_slice.loc[series].values
normalized_data = data / max(abs(data))
plot_series = data
# plot_series = plot_series / max(abs(plot_series))
color = series_colors[series]
symbol = series_symbols[series]
p1.plot(raw_wavelengths, plot_series,
pen=color,
name=series, symbol=symbol, symbolBrush=color,
symbolPen=None, symbolSize=5)
p2 = wind.addPlot(row=2, col=1)
# p2.addLegend()
for series in means_slice.index:
# if 'di water' not in series and 's water' not in series and 'ceramic'
# not in series:
if 'di water' not in series:
data = means_slice.loc[series].values
c_int_data = c_int_slice.loc[series].values
usl_data = data + c_int_data
lsl_data = data - c_int_data
# normalized_data = data / max(abs(data))
# noinspection PyUnresolvedReferences
plot_series = \
np.gradient(data, 1) / \
np.gradient(raw_wavelengths, 1)
# noinspection PyUnresolvedReferences
usl_plot_series = \
np.gradient(usl_data, 1) / \
np.gradient(raw_wavelengths, 1)
# noinspection PyUnresolvedReferences
lsl_plot_series = \
np.gradient(lsl_data, 1) / \
np.gradient(raw_wavelengths, 1)
# plot_series = (plot_series - np.mean(plot_series)) / \
# np.std(plot_series)
color = series_colors[series]
symbol = series_symbols[series]
p2.plot(raw_wavelengths, plot_series,
pen=color,
name=series, symbol=symbol, symbolBrush=color,
symbolPen=None, symbolSize=5)
p2.plot(
raw_wavelengths,
usl_plot_series,
pen=color,
name=series,
symbol=None,
symbolBrush=color,
symbolPen=None,
symbolSize=5,
style=QtCore.Qt.DashLine,
width=3)
p2.plot(
raw_wavelengths,
lsl_plot_series,
pen=color,
name=series,
symbol=None,
symbolBrush=color,
symbolPen=None,
symbolSize=5,
style=QtCore.Qt.DashLine,
width=3)
p3 = wind.addPlot(row=3, col=1)
# p3.addLegend()
for series in deriv_means_slice.index:
# if 'di water' not in series and 's water' not in series and 'ceramic'
# not in series:
if 'di water' not in series: # and 'ceramic' not in series:
data = deriv_means_slice.loc[series].values
c_int_data = deriv_c_int_slice.loc[series].values
usl_data = data + c_int_data
lsl_data = data - c_int_data
# normalized_data = data / max(abs(data))
# noinspection PyUnresolvedReferences
plot_series = data
plot_series = (plot_series - np.mean(plot_series)) / \
np.std(plot_series)
# noinspection PyUnresolvedReferences
usl_plot_series = usl_data
usl_plot_series = (usl_plot_series - np.mean(usl_plot_series)) / \
np.std(usl_plot_series)
# noinspection PyUnresolvedReferences
lsl_plot_series = lsl_data
lsl_plot_series = (lsl_plot_series - np.mean(lsl_plot_series)) / \
np.std(lsl_plot_series)
color = series_colors[series]
symbol = series_symbols[series]
p3.plot(raw_wavelengths, plot_series,
pen=color,
name=series, symbol=symbol, symbolBrush=color,
symbolPen=None, symbolSize=5)
p3.plot(
raw_wavelengths,
lsl_plot_series,
pen=color,
name=series,
symbol=None,
symbolBrush=color,
symbolPen=None,
symbolSize=5,
style=QtCore.Qt.DashLine,
width=3)
p3.plot(
raw_wavelengths,
usl_plot_series,
pen=color,
name=series,
symbol=None,
symbolBrush=color,
symbolPen=None,
symbolSize=5,
style=QtCore.Qt.DashLine,
width=3)
# p1.legend.setLayoutDirection(QtCore.Qt.LayoutDirection.LeftToRight)
# p2.legend.setScale(dpi_res/100.0)
# p3.legend.setScale(dpi_res/100.0)
labelStyle = {'color': '#FFF', 'font-size': '25pt'}
p3.setLabel('bottom', 'Wavelength, nm', **labelStyle)
p1.setLabel('top', 'Raw data (Ref.)', **labelStyle)
p2.setLabel('top', '1st derivative, with 95%CI' + "'" + 's', **labelStyle)
p3.setLabel(
'top',
'1st derivative of log, normalized, with 95%CI' +
"'" +
's',
**labelStyle)
# p1.legend.setStyle(**labelStyle)
# p1.legend.setFixedHeight(20)
# p1.legend.setMaximumHeight(20)
p1.legend.setScale(2.0)
# for item in wind.items():
# if isinstance(
# item,
# QtGui.QGraphicsTextItem): # and item.textWidth() > 0:
# item.setFont(QtGui.QFont('Monospaced', 12))
# print item.toHtml()
wind.show()
# Start Qt event loop unless running in interactive mode.
if __name__ == '__main__':
import sys
if (sys.flags.interactive != 1) or \
not hasattr(QtCore, 'PYQT_VERSION'):
# noinspection PyArgumentList
QtGui.QApplication.instance().exec_()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment