chrisalbon · May 29, 2023 20:11
diff --git a/gistfile1.py b/gistfile1.py
 """
 This module imports necessary libraries from PyTorch, torchvision, and sets up the device configuration for CUDA if available.
 It then sets hyperparameters for an application of a fully connected neural network on the MNIST dataset of handwritten digits.
 The MNIST dataset is loaded, transformed into tensors, and split into a training and a test dataset.

 The fully connected neural network architecture is defined with one hidden layer, after which the model is initiated.
 CrossEntropyLoss is used as the loss function, and Adam optimizer is used for optimization.

 Training is done for the specified number of epochs, where for each epoch, forward pass is performed, loss is calculated,
 backpropagation is applied, and the model parameters are updated.

 After training, the model is tested on the test dataset without computing gradients to check the accuracy of the model
 on the test data. Finally, the model's state_dict is saved as a checkpoint.
 """

 import time

 import torch
 import torch.nn as nn
 import torchvision
 import torchvision.transforms as transforms

 # Device configuration
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

 print(device)

 # Hyper-parameters
 input_size = 784
 hidden_size = 500
 num_classes = 10
 num_epochs = 10
 batch_size = 100
 learning_rate = 0.001

 # MNIST dataset
 train_dataset = torchvision.datasets.MNIST(
    root="data", train=True, transform=transforms.ToTensor(), download=True
 )

 test_dataset = torchvision.datasets.MNIST(
    root="data", train=False, transform=transforms.ToTensor()
 )

 # Data loader
 train_loader = torch.utils.data.DataLoader(
    dataset=train_dataset, batch_size=batch_size, shuffle=True
 )

 test_loader = torch.utils.data.DataLoader(
    dataset=test_dataset, batch_size=batch_size, shuffle=False
 )

 # Fully connected neural network with one hidden layer
 class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out


 model = NeuralNet(input_size, hidden_size, num_classes).to(device)

 # Loss and optimizer
 criterion = nn.CrossEntropyLoss()
 optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

 # Train the model
 total_step = len(train_loader)

 start_time = time.time()  # Start time

 for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):  
        # Move tensors to the configured device
        images = images.reshape(-1, 28*28).to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

 end_time = time.time()  # End time

 print("Total execution time: {:.2f} seconds".format(end_time - start_time))

 # Test the model
 # In test phase, we don't need to compute gradients (for memory efficiency)
 with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.reshape(-1, 28 * 28).to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(
        "Accuracy of the network on the 10000 test images: {} %".format(
            100 * correct / total
        )
    )

 # Save the model checkpoint
 torch.save(model.state_dict(), "model.ckpt")
	"""
	This module imports necessary libraries from PyTorch, torchvision, and sets up the device configuration for CUDA if available.
	It then sets hyperparameters for an application of a fully connected neural network on the MNIST dataset of handwritten digits.
	The MNIST dataset is loaded, transformed into tensors, and split into a training and a test dataset.

	The fully connected neural network architecture is defined with one hidden layer, after which the model is initiated.
	CrossEntropyLoss is used as the loss function, and Adam optimizer is used for optimization.

	Training is done for the specified number of epochs, where for each epoch, forward pass is performed, loss is calculated,
	backpropagation is applied, and the model parameters are updated.

	After training, the model is tested on the test dataset without computing gradients to check the accuracy of the model
	on the test data. Finally, the model's state_dict is saved as a checkpoint.
	"""

	import time

	import torch
	import torch.nn as nn
	import torchvision
	import torchvision.transforms as transforms

	# Device configuration
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	print(device)

	# Hyper-parameters
	input_size = 784
	hidden_size = 500
	num_classes = 10
	num_epochs = 10
	batch_size = 100
	learning_rate = 0.001

	# MNIST dataset
	train_dataset = torchvision.datasets.MNIST(
	root="data", train=True, transform=transforms.ToTensor(), download=True
	)

	test_dataset = torchvision.datasets.MNIST(
	root="data", train=False, transform=transforms.ToTensor()
	)

	# Data loader
	train_loader = torch.utils.data.DataLoader(
	dataset=train_dataset, batch_size=batch_size, shuffle=True
	)

	test_loader = torch.utils.data.DataLoader(
	dataset=test_dataset, batch_size=batch_size, shuffle=False
	)

	# Fully connected neural network with one hidden layer
	class NeuralNet(nn.Module):
	def __init__(self, input_size, hidden_size, num_classes):
	super(NeuralNet, self).__init__()
	self.fc1 = nn.Linear(input_size, hidden_size)
	self.relu = nn.ReLU()
	self.fc2 = nn.Linear(hidden_size, num_classes)

	def forward(self, x):
	out = self.fc1(x)
	out = self.relu(out)
	out = self.fc2(out)
	return out


	model = NeuralNet(input_size, hidden_size, num_classes).to(device)

	# Loss and optimizer
	criterion = nn.CrossEntropyLoss()
	optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

	# Train the model
	total_step = len(train_loader)

	start_time = time.time() # Start time

	for epoch in range(num_epochs):
	for i, (images, labels) in enumerate(train_loader):
	# Move tensors to the configured device
	images = images.reshape(-1, 28*28).to(device)
	labels = labels.to(device)

	# Forward pass
	outputs = model(images)
	loss = criterion(outputs, labels)

	# Backward and optimize
	optimizer.zero_grad()
	loss.backward()
	optimizer.step()

	if (i+1) % 100 == 0:
	print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
	.format(epoch+1, num_epochs, i+1, total_step, loss.item()))

	end_time = time.time() # End time

	print("Total execution time: {:.2f} seconds".format(end_time - start_time))

	# Test the model
	# In test phase, we don't need to compute gradients (for memory efficiency)
	with torch.no_grad():
	correct = 0
	total = 0
	for images, labels in test_loader:
	images = images.reshape(-1, 28 * 28).to(device)
	labels = labels.to(device)
	outputs = model(images)
	_, predicted = torch.max(outputs.data, 1)
	total += labels.size(0)
	correct += (predicted == labels).sum().item()

	print(
	"Accuracy of the network on the 10000 test images: {} %".format(
	100 * correct / total
	)
	)

	# Save the model checkpoint
	torch.save(model.state_dict(), "model.ckpt")