Skip to content

Instantly share code, notes, and snippets.

View Olshansk's full-sized avatar
🦉

Daniel Olshansky Olshansk

🦉
View GitHub Profile
@Olshansk
Olshansk / joint_probability_matricies_5.py
Last active May 25, 2020 23:25
Joint Probability Matrix - Counts To Percentages
axis = np.linspace(5, 105, NUM_BINS + 1)[:-1]
jp_df = pd.DataFrame(jp_matrix, columns=axis, index=axis)
jp_df = jp_df.applymap(lambda val: 0 if math.isnan(val) else (val / NUM_STUDENTS))
jp_df
@Olshansk
Olshansk / joint_probability_matricies_4.py
Last active May 25, 2020 23:22
Joint Probability Matrices - Create Transition Matrix Function
def create_joint_probability_matrix(data_GT, data_P, bins):
# https://stackoverflow.com/questions/38931566
def background_gradient(s, m=None, M=None, cmap='Reds', low=0, high=0):
if m is None:
m = s.min().min()
if M is None:
M = s.max().max()
rng = M - m
norm = colors.Normalize(m - (rng * low), M + (rng * high))
@Olshansk
Olshansk / joint_probability_matricies_3.py
Last active May 25, 2020 23:21
Joint Probability Matrices - Regression Analysis
# Regression Analysis
fig, ax = plt.subplots(2, 1, figsize=(20,20))
sns.set(color_codes=True)
print("mean_squared_error: ", round(mean_squared_error(grades_GT, grades_P), 2))
print("mean_absolute_error: ", round(mean_absolute_error(grades_GT, grades_P), 2))
print("explained_variance_score: ", round(explained_variance_score(grades_GT, grades_P), 2))
ax[0].tick_params(axis='both', labelsize=25)
@Olshansk
Olshansk / transition_matricies_3.py
Created May 24, 2020 19:55
Transition Matricies - Regression Analysis
# Regression Analysis
fig, ax = plt.subplots(2, 1, figsize=(20,20))
sns.set(color_codes=True)
print("r2_score: ", round(r2_score(grades_GT, grades_P), 2))
print("mean_squared_error: ", round(mean_squared_error(grades_GT, grades_P), 2))
print("mean_absolute_error: ", round(mean_absolute_error(grades_GT, grades_P), 2))
print("explained_variance_score: ", round(explained_variance_score(grades_GT, grades_P), 2))
@Olshansk
Olshansk / joint_probability_matricies_3.py
Last active May 25, 2020 23:24
Joint Probability Matricies - Group and Merge the Data
# Generate a pandas dataframe where the index represents the student number
df_GT = pd.DataFrame({'bucket': bucket_GT}).reset_index()
display(df_GT.head())
df_P = pd.DataFrame({'bucket': cut_P}).reset_index()
display(df_P.head())
# Merged the actual predicted grades
merged_df = pd.merge(df_GT, df_P, on=['index'], suffixes=('_grouth_truth', '_predicted'))
display(merged_df.head())
@Olshansk
Olshansk / joint_probability_matricies_2.py
Last active May 25, 2020 23:16
Transition Matricies - Bucketize Data
NUM_BINS = 10
bins = np.linspace(0, 100, NUM_BINS + 1)
# Note: we set include_lowest to true to make sure that we include zeroes
bucket_GT = pd.cut(grades_GT, bins=bins, include_lowest=True, right=True)
bucket_P = pd.cut(grades_P, bins=bins, include_lowest=True, right=True)
# Output of the cut function
pd.DataFrame({'grades': grades_GT, 'bucket': bucket_GT}).head()
@Olshansk
Olshansk / joint_probability_matricies_1.py
Last active May 25, 2020 23:08
Joint Probability Matrices - Data Generation
# Data Generation
NUM_STUDENTS = 30
MEAN = 80
STD = 20
# https://stackoverflow.com/questions/36894191/how-to-get-a-normal-distribution-within-a-range-in-numpy
# Need to cap the values of the distributions to [0,100]
def get_truncated_normal(mean, sd, size, low, upp):
return truncnorm((low - mean) / sd, (upp - mean) / sd, loc=mean, scale=sd).rvs(size)
@Olshansk
Olshansk / get_stats_profile_example_print_stats.py
Created February 17, 2020 14:43
An example of how to profile a simple python script using print_stats
import cProfile, pstats
import time
from random import randint
def sleep1():
time.sleep(0.1)
def sleep2():
time.sleep(0.2)
@Olshansk
Olshansk / get_stats_profile_visualization.py
Last active February 17, 2020 03:16
Visualize the results of all the aggregated stats profiles
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter
WIDTH = 0.4
ind = np.arange(len(time_sliced_counters))
x_axis = tuple(time for (time, c) in time_sliced_counters)
@Olshansk
Olshansk / get_stats_profile_log_aggregation.py
Last active February 17, 2020 19:20
A short script that aggregates all the timestamped & logged stats profiles
from collections import Counter
import itertools
TIME_SLICE = 10 # Aggregate logs every 10 seconds
def time_to_bucket(time):
return (time-START_TIME) // TIME_SLICE
def bucket_to_time(bucket):
return bucket * TIME_SLICE + START_TIME