This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## source: http://nbviewer.jupyter.org/github/rasbt/algorithms_in_ipython_notebooks/blob/master/ipython_nbs/statistics/linregr_least_squares_fit.ipynb#Sections | |
import numpy as np | |
np.testing.assert_almost_equal(arr_1, arr_2, decimal=5) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# source: http://nbviewer.jupyter.org/github/rasbt/algorithms_in_ipython_notebooks/blob/master/ipython_nbs/statistics/linregr_least_squares_fit.ipynb#Sections | |
# source_2: https://github.com/rasbt/data-science-tutorial/blob/master/code/linear-reqression-leastsquares.ipynb | |
import numpy as np | |
def matrix_lstsqr(x, y): | |
""" Computes the least-squares solution to a linear matrix equation. """ | |
X = np.vstack([x, np.ones(len(x))]).T | |
return (np.linalg.inv(X.T.dot(X)).dot(X.T)).dot(y) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def normalised_hist(x): | |
counts, ranges = np.histogram(x.dropna(), range=(min(x), max(x))) | |
weights = np.ones_like(x.shape[0])/float(x.shape[0]) | |
normalised_counts = counts * weights | |
ranges = ranges.astype(int) | |
return pd.Series(normalised_counts, zip(ranges[:-1], ranges[1:])) | |
kw = dict(stacked=True, width=1, rot=45) | |
df.groupby('flag').get_group(True)[['numerical_att')]].apply(normalised_hist).unstack(0).plot.bar(**kw) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## source: https://www.kaggle.com/jeru666/did-you-think-of-these-features | |
def change_datatype(df): | |
int_cols = list(df.select_dtypes(include=['int']).columns) | |
for col in int_cols: | |
if ((np.max(df[col]) <= 127) and(np.min(df[col] >= -128))): | |
df[col] = df[col].astype(np.int8) | |
elif ((np.max(df[col]) <= 32767) and(np.min(df[col] >= -32768))): | |
df[col] = df[col].astype(np.int16) | |
elif ((np.max(df[col]) <= 2147483647) and(np.min(df[col] >= -2147483648))): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ranges = [0,50,100,160] | |
ranges_label = ['cheap', "average", "expensive"] | |
df['price_cat'] = pd.cut(df.current_package_price, ranges, labels=ranges_label) | |
# Another example | |
# Bucketing age groups | |
ranges = [18,25,29,34, 50,70, 2020] | |
ranges_label = ['18-25','26-29',"30-34", "35-50", '51-70','unknown'] | |
df['age_group'] = pd.cut(df['age'], ranges, labels=ranges_label) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## pip install gender-guesser | |
import gender_guesser.detector as gender | |
def get_gender(x): | |
d = gender.Detector() | |
return d.get_gender(x.lower().capitalize()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# source: https://stackoverflow.com/a/52672859/5554394 | |
from IPython.display import clear_output | |
from matplotlib import pyplot as plt | |
import collections | |
%matplotlib inline | |
def live_plot(data_dict, figsize=(7,5), title=''): | |
clear_output(wait=True) | |
plt.figure(figsize=figsize) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Convert camel-case to snake-case in python. | |
e.g.: CamelCase -> snake_case | |
Relevant StackOverflow question: http://stackoverflow.com/a/1176023/293064 | |
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import warnings | |
warnings.filterwarnings('ignore') | |
warnings.simplefilter('ignore') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib as mpl | |
import matplotlib.pyplot as plt | |
large = 22; med = 16; small = 12 | |
params = {'axes.titlesize': large, | |
'legend.fontsize': med, | |
'figure.figsize': (16, 10), | |
'axes.labelsize': med, | |
'axes.titlesize': med, | |
'xtick.labelsize': med, |
OlderNewer