Skip to content

Instantly share code, notes, and snippets.

View khanhnamle1994's full-sized avatar
🎯
Focusing

James Le khanhnamle1994

🎯
Focusing
View GitHub Profile
@khanhnamle1994
khanhnamle1994 / VMF.py
Created February 22, 2020 21:59
Variational Matrix Factorization class
import torch
from torch import nn
import torch.nn.functional as F
class MF(nn.Module):
def __call__(self, train_x):
# These are the user and item indices
user_id = train_x[:, 0]
@khanhnamle1994
khanhnamle1994 / CatBoost3.py
Last active January 7, 2020 04:02
Third Code Snippet For CatBoost Article
# Training Step
model = CatBoostRegressor()
# Fit the model into the train data
model.fit(train_data)
# Predict on the test data
y_pred = model.predict(test_data)
# Evaluation
@khanhnamle1994
khanhnamle1994 / CatBoost2.py
Created December 17, 2019 16:04
Second Code Snippet For CatBoost Article
# Transform a count matrix to a normalized tf or tf-idf representation
transformer = TfidfTransformer()
# Fit to data matrices, then transform them
data_1 = transformer.fit_transform(data_cor1)
data_2 = transformer.fit_transform(data_cor2)
# Create a new column from these values
data['cor1_Tfidf'] = np.mean(data_1, 1)
data['cor2_Tfidf'] = np.mean(data_2, 1)
@khanhnamle1994
khanhnamle1994 / CatBoost1.py
Created December 17, 2019 16:02
First Code Snippet For CatBoost Article
# Making the corpus of text based on "name" and "item_description" columns
corpus = data.name.values.astype('U').tolist() + data.item_description.values.astype('U').tolist()
# Convert a collection of text documents to a matrix of token counts
vectorizer = CountVectorizer(dtype=np.float32, stop_words='english', ngram_range=(1, 3), min_df=3)
# Learn a vocabulary dictionary of all tokens in the raw documents
vectorizer.fit(corpus)
# Transform documents to document-term matrix
@khanhnamle1994
khanhnamle1994 / association_rules.py
Created October 2, 2019 19:37
Association rules function for the Instacart blog post
def association_rules(order_item, min_support):
print("Starting order_item: {:22d}".format(len(order_item)))
# Calculate item frequency and support
item_stats = freq(order_item).to_frame("freq")
item_stats['support'] = item_stats['freq'] / order_count(order_item) * 100
# Filter from order_item items below min support
qualifying_items = item_stats[item_stats['support'] >= min_support].index
def plot_confusion_matrix(data_pred_cls,data_predicted_cls):
# This is called from print_test_accuracy() below.
# cls_pred is an array of the predicted class-number for
# all images in the test-set.
# Get the confusion matrix using sklearn.
cm = confusion_matrix(y_true=data_pred_cls,
y_pred=data_predicted_cls)
def plot_images(images, cls_true, cls_pred=None):
assert len(images) == len(cls_true) == 12
# Create figure with 3x3 sub-plots.
fig, axes = plt.subplots(4, 3)
fig.subplots_adjust(hspace=0.3, wspace=0.3)
for i, ax in enumerate(axes.flat):
# Plot image.
def optimize(num_iterations, X):
global total_iterations
start_time = time.time()
#array to plot
losses = {'train':[], 'validation':[]}
for i in range(num_iterations):
total_iterations += 1
batch_size = 50
#function next_batch
def next_batch(num, data, labels):
'''
Return a total of `num` random samples and labels.
'''
idx = np.arange(0 , len(data))
np.random.shuffle(idx)
idx = idx[:num]
# Convolutional Layer 1.
filter_size1 = 5 # Convolution filters are 5 x 5 pixels.
num_filters1 = 32 # There are 32 of these filters.
# Convolutional Layer 2.
filter_size2 = 4 # Convolution filters are 4 x 4 pixels.
num_filters2 = 64 # There are 64 of these filters.