Skip to content

Instantly share code, notes, and snippets.

View khuangaf's full-sized avatar
:octocat:
Focusing

Kung-Hsiang Steeve Huang khuangaf

:octocat:
Focusing
View GitHub Profile
from sklearn.preprocessing import LabelEncoder
df = pd.read_csv('../input/yoochoose-click.dat', header=None)
df.columns=['session_id','timestamp','item_id','category']
buy_df = pd.read_csv('../input/yoochoose-buys.dat', header=None)
buy_df.columns=['session_id','timestamp','item_id','price','quantity']
item_encoder = LabelEncoder()
df['item_id'] = item_encoder.fit_transform(df.item_id)
import torch
from torch_geometric.data import Data
x = torch.tensor([[2,1], [5,6], [3,7], [12,0]], dtype=torch.float)
y = torch.tensor([0, 1, 0, 1], dtype=torch.float)
edge_index = torch.tensor([[0, 2, 1, 0, 3],
[3, 1, 0, 1, 2]], dtype=torch.long)
edge_index = torch.tensor([[0, 2, 1, 0, 3],
[3, 1, 0, 1, 2]], dtype=torch.long)
edge_index = torch.tensor([[0, 2, 1, 0, 3],
[3, 1, 0, 1, 2]], dtype=torch.long)
edge_index = torch.tensor([[0, 1, 2, 0, 3],
[1, 0, 1, 3, 2]], dtype=torch.long)
x = torch.tensor([[2,1], [5,6], [3,7], [12,0]], dtype=torch.float)
y = torch.tensor([0, 1, 0, 1], dtype=torch.float)
@khuangaf
khuangaf / CategoricalEncoder.py
Last active March 31, 2019 19:19
This class is intended for faster and simpler categorical/ label encoding on large data, in which sklearn's LabelEncoder might be too slow.
'''
Author: Kung-hsiang, Huang (Steeve)
Date: 2019/Mar/15
'''
class CategoricalEncoder():
'''
This class is for those operating on large data, in which sklearn's LabelEncoder class may take too much time.
This encoder is only suitable for 1-d array/ list. You may modify it to become n-d compatible.
'''
def forward(self, user_indices, item_indices, titles):
user_embedding_mlp = self.embedding_user_mlp(user_indices)
item_embedding_mlp = self.embedding_item_mlp(item_indices)
user_embedding_mf = self.embedding_user_mf(user_indices)
item_embedding_mf = self.embedding_item_mf(item_indices)
#### mf part
mf_vector =torch.mul(user_embedding_mf, item_embedding_mf)
mf_vector = torch.nn.Dropout(self.config.dropout_rate_mf)(mf_vector)
def __init__(self, config):
super(NeuMF, self).__init__()
#mf part
self.embedding_user_mf = torch.nn.Embedding(num_embeddings=self.num_users, embedding_dim=self.latent_dim_mf)
self.embedding_item_mf = torch.nn.Embedding(num_embeddings=self.num_items, embedding_dim=self.latent_dim_mf)
#mlp part
self.embedding_user_mlp = torch.nn.Embedding(num_embeddings=self.num_users, embedding_dim=self.latent_dim_mlp)
self.embedding_item_mlp = torch.nn.Embedding(num_embeddings=self.num_items, embedding_dim=self.latent_dim_mlp)
class NeuMF(torch.nn.Module):
def __init__(self, config):
super(NeuMF, self).__init__()
#mf part
self.embedding_user_mf = torch.nn.Embedding(num_embeddings=self.num_users, embedding_dim=self.latent_dim_mf)
self.embedding_item_mf = torch.nn.Embedding(num_embeddings=self.num_items, embedding_dim=self.latent_dim_mf)
#mlp part
self.embedding_user_mlp = torch.nn.Embedding(num_embeddings=self.num_users, embedding_dim=self.latent_dim_mlp)