James Le khanhnamle1994

🎯

Focusing

Data Journalist 📝 -> Data Scientist 📊 -> Machine Learning Researcher 🔍 -> Developer Advocate 🤝

khanhnamle1994 / VMF.py

Created February 22, 2020 21:59

Variational Matrix Factorization class

	import torch
	from torch import nn
	import torch.nn.functional as F

	class MF(nn.Module):

	def __call__(self, train_x):

	# These are the user and item indices
	user_id = train_x[:, 0]

khanhnamle1994 / CatBoost3.py

Last active January 7, 2020 04:02

Third Code Snippet For CatBoost Article

	# Training Step
	model = CatBoostRegressor()

	# Fit the model into the train data
	model.fit(train_data)

	# Predict on the test data
	y_pred = model.predict(test_data)

	# Evaluation

khanhnamle1994 / CatBoost2.py

Created December 17, 2019 16:04

Second Code Snippet For CatBoost Article

	# Transform a count matrix to a normalized tf or tf-idf representation
	transformer = TfidfTransformer()

	# Fit to data matrices, then transform them
	data_1 = transformer.fit_transform(data_cor1)
	data_2 = transformer.fit_transform(data_cor2)

	# Create a new column from these values
	data['cor1_Tfidf'] = np.mean(data_1, 1)
	data['cor2_Tfidf'] = np.mean(data_2, 1)

khanhnamle1994 / CatBoost1.py

Created December 17, 2019 16:02

First Code Snippet For CatBoost Article

	# Making the corpus of text based on "name" and "item_description" columns
	corpus = data.name.values.astype('U').tolist() + data.item_description.values.astype('U').tolist()

	# Convert a collection of text documents to a matrix of token counts
	vectorizer = CountVectorizer(dtype=np.float32, stop_words='english', ngram_range=(1, 3), min_df=3)

	# Learn a vocabulary dictionary of all tokens in the raw documents
	vectorizer.fit(corpus)

	# Transform documents to document-term matrix

khanhnamle1994 / association_rules.py

Created October 2, 2019 19:37

Association rules function for the Instacart blog post

	def association_rules(order_item, min_support):

	print("Starting order_item: {:22d}".format(len(order_item)))

	# Calculate item frequency and support
	item_stats = freq(order_item).to_frame("freq")
	item_stats['support'] = item_stats['freq'] / order_count(order_item) * 100

	# Filter from order_item items below min support
	qualifying_items = item_stats[item_stats['support'] >= min_support].index

khanhnamle1994 / dog-breed15.py

Created March 12, 2019 13:45

	def plot_confusion_matrix(data_pred_cls,data_predicted_cls):
	# This is called from print_test_accuracy() below.


	# cls_pred is an array of the predicted class-number for
	# all images in the test-set.

	# Get the confusion matrix using sklearn.
	cm = confusion_matrix(y_true=data_pred_cls,
	y_pred=data_predicted_cls)

khanhnamle1994 / dog-breed14.py

Created March 12, 2019 13:44

	def plot_images(images, cls_true, cls_pred=None):
	assert len(images) == len(cls_true) == 12

	# Create figure with 3x3 sub-plots.
	fig, axes = plt.subplots(4, 3)
	fig.subplots_adjust(hspace=0.3, wspace=0.3)


	for i, ax in enumerate(axes.flat):
	# Plot image.

khanhnamle1994 / dog-breed13.py

Created March 12, 2019 13:43

	def optimize(num_iterations, X):
	global total_iterations

	start_time = time.time()

	#array to plot
	losses = {'train':[], 'validation':[]}

	for i in range(num_iterations):
	total_iterations += 1

khanhnamle1994 / dog-breed12.py

Created March 12, 2019 13:42

	batch_size = 50

	#function next_batch
	def next_batch(num, data, labels):
	'''
	Return a total of `num` random samples and labels.
	'''
	idx = np.arange(0 , len(data))
	np.random.shuffle(idx)
	idx = idx[:num]

khanhnamle1994 / dog-breed11.py

Created March 12, 2019 13:39

	# Convolutional Layer 1.
	filter_size1 = 5 # Convolution filters are 5 x 5 pixels.
	num_filters1 = 32 # There are 32 of these filters.


	# Convolutional Layer 2.
	filter_size2 = 4 # Convolution filters are 4 x 4 pixels.
	num_filters2 = 64 # There are 64 of these filters.