🦉

Daniel Olshansky Olshansk

🦉

CTO @buildwithgrove @pokt-network Any domain can be mastered with enough interest and focus. USDC(base): 0x9ab39B84aC4DE6D705C5f051c07db8fE72890953

Olshansk / joint_probability_matricies_5.py

Last active May 25, 2020 23:25

Joint Probability Matrix - Counts To Percentages

	axis = np.linspace(5, 105, NUM_BINS + 1)[:-1]
	jp_df = pd.DataFrame(jp_matrix, columns=axis, index=axis)
	jp_df = jp_df.applymap(lambda val: 0 if math.isnan(val) else (val / NUM_STUDENTS))
	jp_df

Olshansk / joint_probability_matricies_4.py

Last active May 25, 2020 23:22

Joint Probability Matrices - Create Transition Matrix Function

	def create_joint_probability_matrix(data_GT, data_P, bins):

	# https://stackoverflow.com/questions/38931566
	def background_gradient(s, m=None, M=None, cmap='Reds', low=0, high=0):
	if m is None:
	m = s.min().min()
	if M is None:
	M = s.max().max()
	rng = M - m
	norm = colors.Normalize(m - (rng * low), M + (rng * high))

Olshansk / joint_probability_matricies_3.py

Last active May 25, 2020 23:21

Joint Probability Matrices - Regression Analysis

	# Regression Analysis

	fig, ax = plt.subplots(2, 1, figsize=(20,20))
	sns.set(color_codes=True)

	print("mean_squared_error: ", round(mean_squared_error(grades_GT, grades_P), 2))
	print("mean_absolute_error: ", round(mean_absolute_error(grades_GT, grades_P), 2))
	print("explained_variance_score: ", round(explained_variance_score(grades_GT, grades_P), 2))

	ax[0].tick_params(axis='both', labelsize=25)

Olshansk / transition_matricies_3.py

Created May 24, 2020 19:55

Transition Matricies - Regression Analysis

	# Regression Analysis
	fig, ax = plt.subplots(2, 1, figsize=(20,20))

	sns.set(color_codes=True)

	print("r2_score: ", round(r2_score(grades_GT, grades_P), 2))
	print("mean_squared_error: ", round(mean_squared_error(grades_GT, grades_P), 2))
	print("mean_absolute_error: ", round(mean_absolute_error(grades_GT, grades_P), 2))
	print("explained_variance_score: ", round(explained_variance_score(grades_GT, grades_P), 2))

Olshansk / joint_probability_matricies_3.py

Last active May 25, 2020 23:24

Joint Probability Matricies - Group and Merge the Data

	# Generate a pandas dataframe where the index represents the student number
	df_GT = pd.DataFrame({'bucket': bucket_GT}).reset_index()
	display(df_GT.head())

	df_P = pd.DataFrame({'bucket': cut_P}).reset_index()
	display(df_P.head())

	# Merged the actual predicted grades
	merged_df = pd.merge(df_GT, df_P, on=['index'], suffixes=('_grouth_truth', '_predicted'))
	display(merged_df.head())

Olshansk / joint_probability_matricies_2.py

Last active May 25, 2020 23:16

Transition Matricies - Bucketize Data

	NUM_BINS = 10
	bins = np.linspace(0, 100, NUM_BINS + 1)

	# Note: we set include_lowest to true to make sure that we include zeroes
	bucket_GT = pd.cut(grades_GT, bins=bins, include_lowest=True, right=True)
	bucket_P = pd.cut(grades_P, bins=bins, include_lowest=True, right=True)

	# Output of the cut function
	pd.DataFrame({'grades': grades_GT, 'bucket': bucket_GT}).head()

Olshansk / joint_probability_matricies_1.py

Last active May 25, 2020 23:08

Joint Probability Matrices - Data Generation

	# Data Generation

	NUM_STUDENTS = 30
	MEAN = 80
	STD = 20

	# https://stackoverflow.com/questions/36894191/how-to-get-a-normal-distribution-within-a-range-in-numpy
	# Need to cap the values of the distributions to [0,100]
	def get_truncated_normal(mean, sd, size, low, upp):
	return truncnorm((low - mean) / sd, (upp - mean) / sd, loc=mean, scale=sd).rvs(size)

Olshansk / get_stats_profile_example_print_stats.py

Created February 17, 2020 14:43

An example of how to profile a simple python script using print_stats

Olshansk / get_stats_profile_visualization.py

Last active February 17, 2020 03:16

Visualize the results of all the aggregated stats profiles

	import numpy as np
	import matplotlib
	import matplotlib.pyplot as plt
	from matplotlib.ticker import FormatStrFormatter

	WIDTH = 0.4

	ind = np.arange(len(time_sliced_counters))

	x_axis = tuple(time for (time, c) in time_sliced_counters)

Olshansk / get_stats_profile_log_aggregation.py

Last active February 17, 2020 19:20

A short script that aggregates all the timestamped & logged stats profiles

	from collections import Counter
	import itertools

	TIME_SLICE = 10 # Aggregate logs every 10 seconds

	def time_to_bucket(time):
	return (time-START_TIME) // TIME_SLICE

	def bucket_to_time(bucket):
	return bucket * TIME_SLICE + START_TIME