This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
dataset = dataset.shuffle() | |
train_dataset = dataset[:800000] | |
val_dataset = dataset[800000:900000] | |
test_dataset = dataset[900000:] | |
len(train_dataset), len(val_dataset), len(test_dataset) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from torch_geometric.data import InMemoryDataset | |
from tqdm import tqdm | |
class YooChooseBinaryDataset(InMemoryDataset): | |
def __init__(self, root, transform=None, pre_transform=None): | |
super(YooChooseBinaryDataset, self).__init__(root, transform, pre_transform) | |
self.data, self.slices = torch.load(self.processed_paths[0]) | |
@property |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from torch_geometric.data import InMemoryDataset | |
from tqdm import tqdm | |
class YooChooseBinaryDataset(InMemoryDataset): | |
def process(self): | |
data_list = [] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from torch_geometric.data import InMemoryDataset | |
from tqdm import tqdm | |
class YooChooseBinaryDataset(InMemoryDataset): | |
def process(self): | |
data_list = [] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data_list = [] | |
# process by session_id | |
grouped = df.groupby('session_id') | |
for session_id, group in tqdm(grouped): | |
sess_item_id = LabelEncoder().fit_transform(group.item_id) | |
group = group.reset_index(drop=True) | |
group['sess_item_id'] = sess_item_id | |
node_features = group.loc[group.session_id==session_id,['sess_item_id','item_id']].sort_values('sess_item_id').item_id.drop_duplicates().values |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data_list = [] | |
# process by session_id | |
grouped = df.groupby('session_id') | |
for session_id, group in tqdm(grouped): | |
sess_item_id = LabelEncoder().fit_transform(group.item_id) | |
group = group.reset_index(drop=True) | |
group['sess_item_id'] = sess_item_id | |
node_features = group.loc[group.session_id==session_id,['sess_item_id','item_id']].sort_values('sess_item_id').item_id.drop_duplicates().values |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#randomly sample a couple of them | |
sampled_session_id = np.random.choice(df.session_id.unique(), 1000000, replace=False) | |
df = df.loc[df.session_id.isin(sampled_session_id)] | |
df.nunique() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from torch.nn import Sequential as Seq, Linear, ReLU | |
from torch_geometric.nn import MessagePassing | |
class SAGEConv(MessagePassing): | |
def __init__(self, in_channels, out_channels): | |
super(SAGEConv, self).__init__(aggr='max') # "Max" aggregation. | |
self.lin = torch.nn.Linear(in_channels, out_channels) | |
self.act = torch.nn.ReLU() | |
self.update_weight = Parameter(torch.Tensor(in_channels + out_channels, in_channels)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from torch_geometric.data import InMemoryDataset | |
class MyOwnDataset(InMemoryDataset): | |
def __init__(self, root, transform=None, pre_transform=None): | |
super(MyOwnDataset, self).__init__(root, transform, pre_transform) | |
self.data, self.slices = torch.load(self.processed_paths[0]) | |
@property |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df['label'] = df.session_id.isin(buy_df.session_id) | |
df.head() |