This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import torch | |
| from torch import nn | |
| import torch.nn.functional as F | |
| class MF(nn.Module): | |
| def __call__(self, train_x): | |
| # These are the user and item indices | |
| user_id = train_x[:, 0] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Training Step | |
| model = CatBoostRegressor() | |
| # Fit the model into the train data | |
| model.fit(train_data) | |
| # Predict on the test data | |
| y_pred = model.predict(test_data) | |
| # Evaluation |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Transform a count matrix to a normalized tf or tf-idf representation | |
| transformer = TfidfTransformer() | |
| # Fit to data matrices, then transform them | |
| data_1 = transformer.fit_transform(data_cor1) | |
| data_2 = transformer.fit_transform(data_cor2) | |
| # Create a new column from these values | |
| data['cor1_Tfidf'] = np.mean(data_1, 1) | |
| data['cor2_Tfidf'] = np.mean(data_2, 1) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Making the corpus of text based on "name" and "item_description" columns | |
| corpus = data.name.values.astype('U').tolist() + data.item_description.values.astype('U').tolist() | |
| # Convert a collection of text documents to a matrix of token counts | |
| vectorizer = CountVectorizer(dtype=np.float32, stop_words='english', ngram_range=(1, 3), min_df=3) | |
| # Learn a vocabulary dictionary of all tokens in the raw documents | |
| vectorizer.fit(corpus) | |
| # Transform documents to document-term matrix |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def association_rules(order_item, min_support): | |
| print("Starting order_item: {:22d}".format(len(order_item))) | |
| # Calculate item frequency and support | |
| item_stats = freq(order_item).to_frame("freq") | |
| item_stats['support'] = item_stats['freq'] / order_count(order_item) * 100 | |
| # Filter from order_item items below min support | |
| qualifying_items = item_stats[item_stats['support'] >= min_support].index |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def plot_confusion_matrix(data_pred_cls,data_predicted_cls): | |
| # This is called from print_test_accuracy() below. | |
| # cls_pred is an array of the predicted class-number for | |
| # all images in the test-set. | |
| # Get the confusion matrix using sklearn. | |
| cm = confusion_matrix(y_true=data_pred_cls, | |
| y_pred=data_predicted_cls) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def plot_images(images, cls_true, cls_pred=None): | |
| assert len(images) == len(cls_true) == 12 | |
| # Create figure with 3x3 sub-plots. | |
| fig, axes = plt.subplots(4, 3) | |
| fig.subplots_adjust(hspace=0.3, wspace=0.3) | |
| for i, ax in enumerate(axes.flat): | |
| # Plot image. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def optimize(num_iterations, X): | |
| global total_iterations | |
| start_time = time.time() | |
| #array to plot | |
| losses = {'train':[], 'validation':[]} | |
| for i in range(num_iterations): | |
| total_iterations += 1 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| batch_size = 50 | |
| #function next_batch | |
| def next_batch(num, data, labels): | |
| ''' | |
| Return a total of `num` random samples and labels. | |
| ''' | |
| idx = np.arange(0 , len(data)) | |
| np.random.shuffle(idx) | |
| idx = idx[:num] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Convolutional Layer 1. | |
| filter_size1 = 5 # Convolution filters are 5 x 5 pixels. | |
| num_filters1 = 32 # There are 32 of these filters. | |
| # Convolutional Layer 2. | |
| filter_size2 = 4 # Convolution filters are 4 x 4 pixels. | |
| num_filters2 = 64 # There are 64 of these filters. | |