Skip to content

Instantly share code, notes, and snippets.

from sklearn.preprocessing import OneHotEncoder
from sklearn.base import BaseEstimator, TransformerMixin
class OneHotEconderByColumn(TransformerMixin, BaseEstimator):
def __init__(self, *, columns):
self.columns = columns
self.ohe = OneHotEncoder(drop="first", sparse=False)
def fit(self, X, y=None):
def calculate_distance_one_point(city, position, cache, value=-1):
row, col = position
if (row, col) in cache:
print( f"Already {row} {col}" )
return None
cache.add((row, col))
print("doing", row, col)
@timotta
timotta / word2vec_to_indexed_dataframe.py
Created July 29, 2021 12:42
Gensim word2vec indexed words in a dataframe, dealing with padding and unknown values
# Needs EMBED_SIZE ans SENTENCE_SIZE
# df.text_array is a column with list os word in each cell
w2v_model = Word2Vec(
sentences=df.text_array,
vector_size=EMBED_SIZE,
window=5,
min_count=1,
workers=4,
seed=1982,
@timotta
timotta / lstm_binary_pytorch_skorch.py
Last active July 29, 2021 12:32
LSTM binary classification using pytorch and skorch, and pretrained gensin word2vec
# Needs gensim w2v_model trained
# EMBED_SIZE defined
# X should be a Matrix with examples on rows, and word indexes in sequence as columns
class LSTMClassification(nn.Module):
def __init__(self, embed_vector, hidden_dim=100, dropout=0.5):
super(LSTMClassification, self).__init__()
self.n_layers = 1
self.output_size = 1
self.hidden_dim = hidden_dim
@timotta
timotta / colour_cell_pandas.py
Created July 29, 2021 12:28
Colour the best cell from column in pandas DataFrame
def color_the_best(x):
styles = []
bigger = max(x)
for i in x:
if i == bigger:
styles.append('background-color: lightgreen')
else:
styles.append('')
return styles
df.style.apply(color_the_best, axis=1)
Algoritmo A Algoritmo B Ranking mais ideal ainda
1 Nintendo Switch Colorido (relevância = 1) Nintendo Switch Colorido (relevância = 1) Um nintendo switch cinza (relevância = 2)
2 Nintendo WiiU (relevância = 0) Nintendo Switch Cinza (relevância = 2) Outro nintendo switch cinza (relevancia = 2)
3 XBox Slim (relevância = 0) Nintendo WiiU (relevância = 0) Mais um nintendo switch cinza (relevância = 2)
Algoritmo A Ranking Ideal A Algoritmo B Ranking Ideal B
1 Nintendo Switch Colorido (relevância = 1) Nintendo Switch Colorido (relevância = 1) Nintendo Switch Colorido (relevancia = 1) Nintendo Switch Cinza (relevância = 2)
2 Nintendo WiiU (relevância = 0) XBOX Slim (relevância = 0) Nintendo Switch Cinza (relevância = 2) Nintendo Switch Colorido (relevância = 1)
3 XBOX Slim (relevância = 0) Nintendo WiiU (relevância = 0) XBox Slim (relevância = 0) XBox Slim (relevância = 0)
Algoritmo A Algoritmo B Ranking Ideal
1 Vendo nintendo switch colorido (relevância = 1) Nintendo Switch Cinza (relevância = 2) Nintendo Switch Cinza (relevância = 2)
2 Nintendo Wii U (relevância = 0) Nintendo Wii U (relevância = 0) Vendo nintendo switch colorido (relevância = 1)
3 Nintendo Switch Cinza (relevância = 2) Vendo nintendo switch colorido (relevância = 1) Nintendo Wii U (relevância = 0)
Algoritmo A Algoritmo B
1 Vendo nintendo switch colorido (relevância = 1) Nintendo Switch Cinza (relevância = 2)
2 Nintendo Wii U (relevância = 0) Nintendo Wii U (relevância = 0)
3 Nintendo Switch Cinza (relevância = 2) Vendo nintendo switch colorido (relevância = 1)
rDCG^{k,n} =
\frac {
\sum_{i=1}^{n}\frac{rel_{i}}{log_{2}^{i+1}}
}{
\sum_{i=1}^{k}\frac{rel_{max}}{log_{2}^{i+1}}
}