This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from scipy.stats import entropy | |
from math import log, e | |
import pandas as pd | |
import timeit | |
def entropy1(labels, base=None): | |
value,counts = np.unique(labels, return_counts=True) | |
return entropy(counts, base=base) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def specificity_scorer(y_true, y_pred): | |
tn, fp, fn, tp = metrics.confusion_matrix(y_true, y_pred).ravel() | |
return tn / (tn+fp) | |
specificity = metrics.make_scorer(specificity_scorer, greater_is_better=True) | |
# use in cross_val_score | |
cv = cross_val_score(xgb, dt.features_.values, y, scoring=specificity, cv=5, verbose=2) | |
# use in GridSearchCV |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import statsmodels.api as sm | |
import numpy as np | |
import rpy2.robjects.packages as rpackages | |
import rpy2.robjects as robjects | |
rstats = rpackages.importr('stats') | |
s1 = 1556 | |
n1 = 2455 | |
s2 = 1671 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
>>> corr | |
Impressions Clicks CTR Avg. CPC Cost Avg. position \ | |
Impressions 1.000000 0.599646 NaN 0.301556 0.568137 0.197353 | |
Clicks 0.599646 1.000000 NaN 0.566357 0.987073 0.627268 | |
CTR NaN NaN NaN NaN NaN NaN | |
Avg. CPC 0.301556 0.566357 NaN 1.000000 0.663789 0.809944 | |
Cost 0.568137 0.987073 NaN 0.663789 1.000000 0.707918 | |
Avg. position 0.197353 0.627268 NaN 0.809944 0.707918 1.000000 | |
Conversions 0.558450 0.927165 NaN 0.717237 0.962034 0.746493 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import statsmodels.api as sm | |
from statsmodels.tsa.stattools import adfuller | |
def test_stationarity(timeseries): | |
''' | |
timeseries has a date index and a single column of numeric values | |
''' | |
#Determing rolling statistics |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df = pd.DataFrame( | |
{'Label': [np.nan, np.nan, 'Label1', 'Label2'], | |
'URL': ['https://www.website.com/where-to-buy', | |
np.nan, np.nan, 'https://www.website.com/store'] | |
}, columns=['Label', 'URL']) | |
# this does not actually replace inplace! | |
df[['Label', 'URL']].fillna('', inplace=True) | |
# you have to assign it or use a dict |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import matplotlib.pyplot as plt | |
from matplotlib.colors import ListedColormap | |
# A random colormap for matplotlib | |
rand_cmap = ListedColormap(np.random.rand(256,3)) | |
for i in range(5): | |
plt.scatter([1,2,3], np.random.randn(3), s=10, cmap=rand_cmap) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from scipy.optimize import curve_fit | |
df = pd.DataFrame({ | |
'y': [0.996559203, 0.99161362, 0.9925214090000001, 0.986498352, | |
0.9826329420000001, 0.977550635, 0.9542758440000001, 0.941359915, | |
0.933388103, 0.929990698, 0.920058004, 0.90789857, 0.909764261, | |
0.8944469829999999, 0.912682288, 0.913135466, 0.913485262, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def ma_subplots(df, window, title=None): | |
fig, ax = plt.subplots(df.shape[1], 1, figsize=(12, 7)) | |
ax = ax.ravel() | |
for i,col in enumerate(df): | |
mean = df[col].mean() | |
ma = df[col].rolling(window).mean() | |
mstd = df[col].rolling(window).std() | |
ax[i].plot(df.index, df[col], color='k', label=col) | |
ax[i].plot(ma.index, ma, 'b', label='Moving Avg.') | |
ax[i].fill_between(mstd.index, ma - 2*mstd, ma + 2*mstd, color='b', alpha=0.2) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def scale_between(series, min_amt, max_amt): | |
series_min = series.min() | |
series_max = series.max() | |
return (((max_amt - min_amt)*(series - series_min)) / (series_max - series_min)) + min_amt |
OlderNewer