Skip to content

Instantly share code, notes, and snippets.

@alastairparagas
Created April 27, 2019 18:13
Show Gist options
  • Select an option

  • Save alastairparagas/851655c0d9d88f6e1144be5cbd4c4ef7 to your computer and use it in GitHub Desktop.

Select an option

Save alastairparagas/851655c0d9d88f6e1144be5cbd4c4ef7 to your computer and use it in GitHub Desktop.
Testing the test of doom
from multiprocessing import Pool, Array, Manager
import functools
import ctypes
import numpy as np
import pandas
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import matplotlib
import matplotlib.pyplot as plt
"""
Prep dataframe from a given CSV file
"""
def get_df_set(csv_path="./results.csv"):
eeg_data = pandas.read_csv(csv_path)
print('Read CSV into Pandas')
# Drop first and last 15 minutes of data per patient
grouped = eeg_data.groupby(by=["subject_id", "cohort"]).apply(
lambda df: df.drop(
df.head(900).index
).drop(
df.tail(900).index
)
)
groups_df = [
(x[["eeg_{0}".format(i) for i in range(125)]],
x['sleep_stage'])
for _, x in grouped.groupby(['subject_id'], as_index=False)
]
row_count = len(groups_df)
shared_matrix_base = Array(ctypes.c_double, row_count * row_count)
shared_matrix = np.ctypeslib.as_array(shared_matrix_base.get_obj()).reshape(
(row_count, row_count)
)
return groups_df, shared_matrix
"""
Scoring metric used
"""
def scoring_metric(tuplet, score_matrix, lock):
(i, groups), (x_train, y_train) = tuplet
lock.acquire()
model.fit(x_train, y_train)
lock.release()
print('Trained on subject {0}'.format(i))
for j, (x_test, y_test) in enumerate(groups):
score_matrix[i][j] = np.mean(
1 - (np.absolute(y_test - model.predict(x_test)) * 0.2)
)
return group_score_vector
"""
Run the scoring metric in parallel and any other
pre/post-scoring functionality
"""
def score_model(model, groups, score_matrix, lock=Manager().Lock()):
pool_executor = Pool(2)
print('Running parallel scoring')
return np.array(pool_executor.map(
functools.partial(scoring_metric, score_matrix=score_matrix, lock=lock),
map(
lambda tuplet: ((tuplet[0], groups), tuplet[1]),
enumerate(groups)
),
1
))
"""
Plot results
"""
def plotit(accuracy_score_matrix, img_file_path="accuracy_scores.png"):
print('Plotting results')
fig, ax = plt.subplots()
fig.set_figheight(20)
fig.set_figwidth(20)
im = ax.imshow(accuracy_score_matrix)
ax.set_xticks(np.arange(20))
ax.set_yticks(np.arange(20))
subject_ids = ["subject_{0}".format(i) for i in range(20)]
ax.set_xticklabels(subject_ids)
ax.set_yticklabels(subject_ids)
plt.setp(
ax.get_xticklabels(), rotation=45, ha="right",
rotation_mode="anchor"
)
# Loop over data dimensions and create text annotations.
for i in range(20):
for j in range(20):
text = ax.text(
j, i, round(accuracy_score_matrix[i, j], 3),
ha="center", va="center",
color='black' if accuracy_score_matrix[i, j] > 0.8 else 'white'
)
ax.set_title("Accuracy scores")
fig.tight_layout()
plt.savefig(
img_file_path,
bbox_inches='tight',
pad_inches=0.25
)
if __name__ == '__main__':
df_set, score_matrix = get_df_set()
model = RandomForestClassifier(64)
plotit(score_model(model, df_set, score_matrix))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment