amrakm’s gists

amrakm / Check if two numpy arrays are similar.py

Last active March 16, 2021 19:17

Check if two numpy arrays are similar - useful for confirming ML implementations

	## source: http://nbviewer.jupyter.org/github/rasbt/algorithms_in_ipython_notebooks/blob/master/ipython_nbs/statistics/linregr_least_squares_fit.ipynb#Sections

	import numpy as np

	np.testing.assert_almost_equal(arr_1, arr_2, decimal=5)

amrakm / Fit Line with Least Squared Error

Last active October 26, 2018 21:32

	# source: http://nbviewer.jupyter.org/github/rasbt/algorithms_in_ipython_notebooks/blob/master/ipython_nbs/statistics/linregr_least_squares_fit.ipynb#Sections
	# source_2: https://github.com/rasbt/data-science-tutorial/blob/master/code/linear-reqression-leastsquares.ipynb

	import numpy as np

	def matrix_lstsqr(x, y):
	""" Computes the least-squares solution to a linear matrix equation. """
	X = np.vstack([x, np.ones(len(x))]).T
	return (np.linalg.inv(X.T.dot(X)).dot(X.T)).dot(y)

amrakm / Normalised histogram

Created October 26, 2018 21:26

Normalised histogram that can be applied to Pandas plots directly

	def normalised_hist(x):
	counts, ranges = np.histogram(x.dropna(), range=(min(x), max(x)))
	weights = np.ones_like(x.shape[0])/float(x.shape[0])
	normalised_counts = counts * weights
	ranges = ranges.astype(int)
	return pd.Series(normalised_counts, zip(ranges[:-1], ranges[1:]))

	kw = dict(stacked=True, width=1, rot=45)

	df.groupby('flag').get_group(True)[['numerical_att')]].apply(normalised_hist).unstack(0).plot.bar(**kw)

amrakm / Improve Pandas Memory Efficiency

Created October 27, 2018 11:02

Automatically change dtypes for numerical columns to the minimum it can use

	## source: https://www.kaggle.com/jeru666/did-you-think-of-these-features

	def change_datatype(df):
	int_cols = list(df.select_dtypes(include=['int']).columns)
	for col in int_cols:
	if ((np.max(df[col]) <= 127) and(np.min(df[col] >= -128))):
	df[col] = df[col].astype(np.int8)
	elif ((np.max(df[col]) <= 32767) and(np.min(df[col] >= -32768))):
	df[col] = df[col].astype(np.int16)
	elif ((np.max(df[col]) <= 2147483647) and(np.min(df[col] >= -2147483648))):

amrakm / Bucket pandas continues values

Created October 29, 2018 23:43

	ranges = [0,50,100,160]
	ranges_label = ['cheap', "average", "expensive"]
	df['price_cat'] = pd.cut(df.current_package_price, ranges, labels=ranges_label)

	# Another example
	# Bucketing age groups

	ranges = [18,25,29,34, 50,70, 2020]
	ranges_label = ['18-25','26-29',"30-34", "35-50", '51-70','unknown']
	df['age_group'] = pd.cut(df['age'], ranges, labels=ranges_label)

amrakm / Identify gender from first name.py

Last active March 5, 2021 12:20

#nlp #library

	## pip install gender-guesser

	import gender_guesser.detector as gender

	def get_gender(x):
	d = gender.Detector()
	return d.get_gender(x.lower().capitalize())

amrakm / gist:be4c89555ef5318aa6827905eb887d3f

Created January 15, 2019 14:30

Dynamically update plots in Jupyter lab

	# source: https://stackoverflow.com/a/52672859/5554394

	from IPython.display import clear_output
	from matplotlib import pyplot as plt
	import collections
	%matplotlib inline

	def live_plot(data_dict, figsize=(7,5), title=''):
	clear_output(wait=True)
	plt.figure(figsize=figsize)

amrakm / camel_case_to_snake_case.py

Created April 3, 2019 11:06 — forked from jaytaylor/camel_case_to_snake_case.py

Convert camel-case to snake-case in python.

	#!/usr/bin/env python

	"""
	Convert camel-case to snake-case in python.

	e.g.: CamelCase -> snake_case

	Relevant StackOverflow question: http://stackoverflow.com/a/1176023/293064
	"""

amrakm / Ignore warning in Jupyter Notebooks

Created June 25, 2019 13:52

	import warnings
	warnings.filterwarnings('ignore')
	warnings.simplefilter('ignore')

amrakm / Change matplotlib global config

Created June 25, 2019 16:19

	import matplotlib as mpl
	import matplotlib.pyplot as plt

	large = 22; med = 16; small = 12
	params = {'axes.titlesize': large,
	'legend.fontsize': med,
	'figure.figsize': (16, 10),
	'axes.labelsize': med,
	'axes.titlesize': med,
	'xtick.labelsize': med,

Amr Mashlah amrakm