rohithteja · March 20, 2022 13:44
diff --git a/elliptic_gcn.py b/elliptic_gcn.py
 import torch
 import pandas as pd
 import networkx as nx
 import torch.nn as nn
 import torch.nn.functional as F
 import torch_geometric.transforms as T
 from torch_geometric.nn import ChebConv
 from torch_geometric.data import InMemoryDataset, Data
 from sklearn.model_selection import train_test_split

 # create a NetworkX graph
 G = nx.DiGraph()
 G.add_nodes_from(nodes.iloc[:,0].values)
 G.add_edges_from(edges.values)
 labels = y.values

 # create edge index  
 adj = nx.to_scipy_sparse_matrix(G).tocoo()
 row = torch.from_numpy(adj.row.astype(np.int64)).to(torch.long)
 col = torch.from_numpy(adj.col.astype(np.int64)).to(torch.long)
 edge_index = torch.stack([row, col], dim=0)

 # prepare the embeddings
 embeddings = torch.from_numpy(np.stack(X.values))

 # custom dataset
 class BitcoinDataset(InMemoryDataset):
    def __init__(self, transform=None):
        super(BitcoinDataset, self).__init__('.', transform, None, None)
        data = Data(edge_index=edge_index)
        data.num_nodes = G.number_of_nodes()        
        # embedding 
        data.x = embeddings.type(torch.float32)
        # labels
        y = torch.from_numpy(labels).type(torch.long)
        data.y = y.clone().detach()
        data.num_classes = 2
        # splitting the data into train and test
        X_train, X_test, y_train, y_test = train_test_split(pd.Series(list(G.nodes())), 
                                                            pd.Series(labels),
                                                            stratify=pd.Series(labels),
                                                            test_size=0.20, 
                                                            random_state=42)
        n_nodes = G.number_of_nodes()
        # create train and test masks for data
        train_mask = torch.zeros(n_nodes, dtype=torch.bool)
        test_mask = torch.zeros(n_nodes, dtype=torch.bool)
        train_mask[X_train.index] = True
        test_mask[X_test.index] = True
        data['train_mask'] = train_mask
        data['test_mask'] = test_mask
        self.data, self.slices = self.collate([data])
    def _download(self):
        return
    def _process(self):
        return
    def __repr__(self):
        return '{}()'.format(self.__class__.__name__)
    
 dataset = BitcoinDataset()
 data = dataset[0]

 # GCN model with 2 layers 
 class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = ChebConv(data.num_features, 16,K=2)
        self.conv2 = ChebConv(16, int(data.num_classes),K=2)
    def forward(self):
        x, edge_index = data.x, data.edge_index
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)

 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 data =  data.to(device)
 model = Net().to(device) 
 torch.manual_seed(42)
 optimizer_name = "Adam"
 lr = 1e-1
 optimizer = getattr(torch.optim, optimizer_name)(model.parameters(), lr=lr)
 epochs = 200

 # train GCN
 def train():
    model.train()
    optimizer.zero_grad()
    F.nll_loss(model()[data.train_mask], data.y[data.train_mask]).backward()
    optimizer.step()
    
 @torch.no_grad()
 def test():
    model.eval()
    logits = model()
    mask_train = data['train_mask']
    y_pred_train = logits[mask_train].max(1)[1]
    mask_test = data['test_mask']
    y_pred_test = logits[mask_test].max(1)[1]
    y_train = data.y[mask_train]
    y_test = data.y[mask_test]
    print('***** GCN MODEL *****')
    print('ACC: Train:', metrics.accuracy_score(y_train, y_pred_train).round(2), 
          'Test:', metrics.accuracy_score(y_test, y_pred_test).round(3))
    print('ROC: Train:', metrics.roc_auc_score(y_train, y_pred_train).round(2), 
          'Test:', metrics.roc_auc_score(y_test, y_pred_test).round(3))
    print('F1: Train:', metrics.f1_score(y_train, y_pred_train, pos_label=0).round(2), 
          'Test:', metrics.f1_score(y_test, y_pred_test, pos_label=0).round(3))
    print('**********************')

 for epoch in range(1, epochs):
    train()

 test()
	import torch
	import pandas as pd
	import networkx as nx
	import torch.nn as nn
	import torch.nn.functional as F
	import torch_geometric.transforms as T
	from torch_geometric.nn import ChebConv
	from torch_geometric.data import InMemoryDataset, Data
	from sklearn.model_selection import train_test_split

	# create a NetworkX graph
	G = nx.DiGraph()
	G.add_nodes_from(nodes.iloc[:,0].values)
	G.add_edges_from(edges.values)
	labels = y.values

	# create edge index
	adj = nx.to_scipy_sparse_matrix(G).tocoo()
	row = torch.from_numpy(adj.row.astype(np.int64)).to(torch.long)
	col = torch.from_numpy(adj.col.astype(np.int64)).to(torch.long)
	edge_index = torch.stack([row, col], dim=0)

	# prepare the embeddings
	embeddings = torch.from_numpy(np.stack(X.values))

	# custom dataset
	class BitcoinDataset(InMemoryDataset):
	def __init__(self, transform=None):
	super(BitcoinDataset, self).__init__('.', transform, None, None)
	data = Data(edge_index=edge_index)
	data.num_nodes = G.number_of_nodes()
	# embedding
	data.x = embeddings.type(torch.float32)
	# labels
	y = torch.from_numpy(labels).type(torch.long)
	data.y = y.clone().detach()
	data.num_classes = 2
	# splitting the data into train and test
	X_train, X_test, y_train, y_test = train_test_split(pd.Series(list(G.nodes())),
	pd.Series(labels),
	stratify=pd.Series(labels),
	test_size=0.20,
	random_state=42)
	n_nodes = G.number_of_nodes()
	# create train and test masks for data
	train_mask = torch.zeros(n_nodes, dtype=torch.bool)
	test_mask = torch.zeros(n_nodes, dtype=torch.bool)
	train_mask[X_train.index] = True
	test_mask[X_test.index] = True
	data['train_mask'] = train_mask
	data['test_mask'] = test_mask
	self.data, self.slices = self.collate([data])
	def _download(self):
	return
	def _process(self):
	return
	def __repr__(self):
	return '{}()'.format(self.__class__.__name__)

	dataset = BitcoinDataset()
	data = dataset[0]

	# GCN model with 2 layers
	class Net(torch.nn.Module):
	def __init__(self):
	super(Net, self).__init__()
	self.conv1 = ChebConv(data.num_features, 16,K=2)
	self.conv2 = ChebConv(16, int(data.num_classes),K=2)
	def forward(self):
	x, edge_index = data.x, data.edge_index
	x = F.relu(self.conv1(x, edge_index))
	x = F.dropout(x, training=self.training)
	x = self.conv2(x, edge_index)
	return F.log_softmax(x, dim=1)

	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	data = data.to(device)
	model = Net().to(device)
	torch.manual_seed(42)
	optimizer_name = "Adam"
	lr = 1e-1
	optimizer = getattr(torch.optim, optimizer_name)(model.parameters(), lr=lr)
	epochs = 200

	# train GCN
	def train():
	model.train()
	optimizer.zero_grad()
	F.nll_loss(model()[data.train_mask], data.y[data.train_mask]).backward()
	optimizer.step()

	@torch.no_grad()
	def test():
	model.eval()
	logits = model()
	mask_train = data['train_mask']
	y_pred_train = logits[mask_train].max(1)[1]
	mask_test = data['test_mask']
	y_pred_test = logits[mask_test].max(1)[1]
	y_train = data.y[mask_train]
	y_test = data.y[mask_test]
	print('*** GCN MODEL ***')
	print('ACC: Train:', metrics.accuracy_score(y_train, y_pred_train).round(2),
	'Test:', metrics.accuracy_score(y_test, y_pred_test).round(3))
	print('ROC: Train:', metrics.roc_auc_score(y_train, y_pred_train).round(2),
	'Test:', metrics.roc_auc_score(y_test, y_pred_test).round(3))
	print('F1: Train:', metrics.f1_score(y_train, y_pred_train, pos_label=0).round(2),
	'Test:', metrics.f1_score(y_test, y_pred_test, pos_label=0).round(3))
	print('**********************')

	for epoch in range(1, epochs):
	train()

	test()