- If you already have a version of GCC > 7.2, you can skip this step. Otherwise you have to install it:
brew install gcc
; - make sure the symbolic links are created, i.e.
/usr/local/bin/gcc -> gcc-7
,/usr/local/bin/g++ -> g++-7
- I used the version 7 of GCC, but probably a newer version will work too.
- tell PIP to compile the package, avoiding the pre-compiled binaries:
pip install --force-reinstall --ignore-installed --no-binary :all: pyrfr
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def gaussian(d, bw): | |
return np.exp(-0.5*(d/bw)**2) / (bw*np.sqrt(2*np.pi)) | |
def meanshift_vec(points): | |
dists=poinc_dist_vec(points) #the matrix of the distances | |
weights = gaussian(dists, sigma) #the matrix of the weights | |
expd_w=np.dot(weights, points) #the weighted vectors | |
summed_weight=np.sum(weights,0) # the array of the summed weights, for normalize the weighted vectors | |
shifted_pts=expd_w/np.expand_dims(summed_weight,1) #the normalized vectors | |
return shifted_pts |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def dist_batch(points, n_begin, n_end): | |
expd=np.expand_dims(points,2) | |
tiled=np.tile(expd, n_end-n_begin) #we tile up only (n_end-n_begin) times | |
selected=points[n_begin:n_end] # we select only part (a batch) of the whole dataset | |
trans=np.transpose(selected) | |
diff=trans-tiled | |
num=np.sum(np.square(diff), axis=1) | |
den_sq_norm=1-np.sum(np.square(points),1) | |
den_selected=den_sq_norm[n_begin:n_end] # we select only part (a batch) of the whole dataset |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def __shift(points, sigma, beg, end): | |
dists=dist_batch(points, beg, end) # the distance function just defined | |
weights = gaussian(dists, sigma) | |
expd_w=np.dot(weights.T, points) | |
summed_weight=np.sum(weights,0) | |
return expd_w/np.expand_dims(summed_weight,1) # we return the shifted points | |
def meanshift_batch(points, batches): | |
for i in range(0,n_samples,batches): # we need to loop for each batch, until reach the end | |
last=min(i+batches,n_samples) # we have to select which is the last vector for feeding the following functions |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def __shift(points, sigma, beg, end): | |
dists=dist_batch(points, beg, end) | |
weights = gaussian(dists, sigma) | |
expd_w=np.dot(weights.T, points) | |
summed_weight=np.sum(weights,0) | |
return expd_w/np.expand_dims(summed_weight,1), beg, end # we return also the position of the starting and ending vectors | |
def meanshift_parallel(points, batches) | |
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor: | |
future_shifting = {executor.submit(__shift, points, sigma, d_beg, min(d_beg+batches, n_samples)): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from sklearn_pandas import DataFrameMapper | |
from sklearn.preprocessing import StandardScaler, Imputer, MinMaxScaler, QuantileTransformer | |
#loading the data | |
df=pd.read_csv("train.csv") | |
#counting the null features per user | |
df["N/A"]=np.sum((df[[col for col in df_best_usrs_prv_dummy.columns]].isna()).values, axis=1) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#run the dimensionality reduction | |
embedding = umap.UMAP(n_neighbors=55).fit_transform(df_preprocessed.values) | |
#plotting results | |
plt.figure(figsize=(16,6)) | |
plt.scatter(embedding_best[:, 0], embedding_sel[:, 1], color='b', s=10, | |
alpha=.2, label="Customers", marker='o') | |
plt.legend(fontsize=16, numpoints=3) | |
plt.show() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from numba import jit | |
@jit | |
def example_function(arguments): | |
#this function will be compiled by Numba | |
... | |
... | |
... |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@jit(nopython=True, nogil=True) | |
def dist_batch(points, n_begin, n_end): | |
#expd=np.expand_dims(points,2) | |
#tiled=np.tile(expd, n_end-n_begin) #we tile up only (n_end-n_begin) times | |
tiled = np.zeros((points.shape[0], points.shape[1], n_end-n_begin)) | |
for dim in range(n_end-n_begin): | |
tiled[:,:,dim]=points | |
selected=points[n_begin:n_end] # we select only part (a batch) of the whole dataset | |
trans=np.transpose(selected) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
def calc_beta_mode(a, b): | |
'''this function calculate the mode (peak) of the Beta distribution''' | |
return (a-1)/(a+b-2) | |
def plot(betas, names, linf=0, lsup=0.01): | |
'''this function plots the Beta distribution''' | |
x=np.linspace(linf,lsup, 100) | |
for f, name in zip(betas,names) : |