Skip to content

Instantly share code, notes, and snippets.

@Erlemar
Created January 15, 2019 04:16
Show Gist options
  • Save Erlemar/880c6366816374012554e3167459f30d to your computer and use it in GitHub Desktop.
Save Erlemar/880c6366816374012554e3167459f30d to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"\n",
"from sklearn.model_selection import train_test_split\n",
"import torch\n",
"from torch.utils.data import DataLoader, Dataset\n",
"import torch.nn as nn\n",
"import torch.nn.functional as F\n",
"import torchvision\n",
"import torchvision.transforms as transforms\n",
"import torch.optim as optim\n",
"import time \n",
"\n",
"from PIL import Image\n",
"train_on_gpu = True\n",
"from torch.utils.data.sampler import SubsetRandomSampler\n",
"from torch.optim.lr_scheduler import CosineAnnealingLR\n",
"from sklearn.preprocessing import LabelEncoder, OneHotEncoder"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"class DigitDataset(Dataset):\n",
" def __init__(self, datafolder='digits_dataset/', transform = transforms.Compose([transforms.ToTensor()]), n=3):\n",
" self.datafolder = datafolder\n",
" self.transform = transform\n",
" self.image_files_list = []\n",
" self.labels = []\n",
" self._load_images_data()\n",
" self.n = 3\n",
" \n",
" def _load_images_data(self):\n",
" \"\"\"\n",
" Preparing images to be used in dataset.\n",
" \n",
" In my image folder I have 10 images for separate digits and \"other1\" folder for non-digits.\n",
" Loading digits is easy - simply reading images, cropping them with bounding boxes (one digit in one image)\n",
" and getting label from image name.\n",
" The number of non-digits which I have is quite low, so I decided to use oversampling.\n",
" \"\"\"\n",
" digit_folders = [f for f in os.listdir(self.datafolder) if 'digit_' in f] + ['other1']\n",
" for folder in digit_folders:\n",
" for i, pic in enumerate(glob.glob(os.path.join(self.datafolder, folder, '*.jpg'))):\n",
" if folder != 'other1':\n",
" self.labels.append(int(pic.split(\"\\\\\")[1].split('__')[0][-1]))\n",
" else:\n",
" for _ in range(n):\n",
" self.labels.append(10)\n",
"\n",
" img = Image.open(pic).convert('RGB')\n",
" bbox = Image.eval(img, lambda px: 255-px).getbbox()\n",
" self.image_files_list.append(img.crop(bbox))\n",
" \n",
" if folder == 'other1':\n",
" for _ in range(n - 1):\n",
" self.image_files_list.append(img.crop(bbox))\n",
" \n",
" def __len__(self):\n",
" return len(self.image_files_list)\n",
"\n",
" def __getitem__(self, idx):\n",
" image = self.image_files_list[idx]\n",
" image = self.transform(image)\n",
" label = self.labels[idx]\n",
" weight = self.weights[idx]\n",
"\n",
" return image, label, weight"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"train_transforms = transforms.Compose([\n",
" transforms.Resize((32, 32)),\n",
" transforms.RandomHorizontalFlip(p=0.2),\n",
" transforms.RandomRotation((-15, 15)),\n",
" transforms.ToTensor(),\n",
" transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))\n",
" ])\n",
"\n",
"test_transforms = transforms.Compose([\n",
" transforms.Resize((32, 32)),\n",
" transforms.ToTensor(),\n",
" transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))\n",
" ])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dataset = DigitDataset(datafolder='digits_dataset/', transform=train_transforms)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# labels need to be one-hot encoded\n",
"onehot_encoder = OneHotEncoder(sparse=False)\n",
"onehot_encoder.fit(np.arange(11).reshape(-1, 1))\n",
"ohe_labels = onehot_encoder.transform(np.array(dataset.labels).reshape(-1, 1))\n",
"dataset.labels = ohe_labels"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# assigning weights to images\n",
"weights = []\n",
"for i in np.unique(dataset.labels.argmax(1), return_counts=True)[1]:\n",
" weights.extend([len(dataset.labels) / i] * i)\n",
" \n",
"dataset.weights = weights"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# splitting data for validation\n",
"tr, val = train_test_split(range(len(dataset.labels)), stratify=dataset.labels, test_size=0.1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"train_sampler = SubsetRandomSampler(list(tr))\n",
"valid_sampler = SubsetRandomSampler(list(val))\n",
"batch_size = 128\n",
"num_workers = 0\n",
"\n",
"train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=train_sampler, num_workers=num_workers)\n",
"valid_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=valid_sampler, num_workers=num_workers)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import torch.nn as nn\n",
"import torch.nn.functional as F\n",
"\n",
"class Net(nn.Module):\n",
" def __init__(self):\n",
" super(Net, self).__init__()\n",
" self.conv1 = nn.Conv2d(3, 8, 3)\n",
" self.pool = nn.MaxPool2d(2, 2)\n",
" self.conv2 = nn.Conv2d(8, 16, 3)\n",
" self.fc1 = nn.Linear(576, 128)\n",
" self.fc2 = nn.Linear(128, 11)\n",
" self.dropout = nn.Dropout(0.1)\n",
"\n",
" def forward(self, x):\n",
" x = self.pool(F.relu(self.conv1(x)))\n",
" x = self.pool(F.relu(self.conv2(x)))\n",
" x = x.view(-1, 576)\n",
" x = self.dropout(F.relu(self.fc1(x)))\n",
" x = self.fc2(x)\n",
" return x\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"model_conv = Net()\n",
"model_conv.cuda()\n",
"criterion = nn.BCEWithLogitsLoss()\n",
"\n",
"optimizer = optim.SGD(model_conv.parameters(), lr=0.1, momentum=0.85)\n",
"model_scheduler = CosineAnnealingLR(optimizer, T_max=5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"valid_loss_hist = []\n",
"train_loss_hist = []\n",
"\n",
"# for manual early stopping\n",
"valid_loss_min = np.Inf\n",
"best_epoch = 0\n",
"patience = 15\n",
"# current number of epochs, where validation loss didn't increase\n",
"p = 0\n",
"# whether training should be stopped\n",
"stop = False\n",
"\n",
"n_epochs = 100\n",
"train_accuracy = []\n",
"valid_accuracy = []\n",
"for epoch in range(1, n_epochs+1):\n",
" print(time.ctime(), 'Epoch:', epoch)\n",
"\n",
" train_loss = []\n",
" train_acc = []\n",
"\n",
" for batch_i, (data, target, weight) in enumerate(train_loader):\n",
"\n",
" data, target, weight = data.cuda(), target.cuda(), weight.cuda()\n",
"\n",
" optimizer.zero_grad()\n",
" output = model_conv(data)\n",
" criterion.weight = weight.view(-1, 1).double()\n",
" loss = criterion(output.double(), target.double())\n",
" train_loss.append(loss.item())\n",
" \n",
" a = target.data.cpu().numpy()\n",
" b = output[:,-1].detach().cpu().numpy()\n",
" train_acc.append(sum(np.argmax(a, axis=1) == output.argmax(1).cpu().numpy()) / len(a))\n",
"\n",
" loss.backward()\n",
" optimizer.step()\n",
" \n",
" model_conv.eval()\n",
" val_loss = []\n",
" val_acc = []\n",
" for batch_i, (data, target, weight) in enumerate(valid_loader):\n",
" data, target, weight = data.cuda(), target.cuda(), weight.cuda()\n",
" output = model_conv(data)\n",
" criterion.weight = weight.view(-1, 1).double()\n",
" loss = criterion(output.double(), target.double())\n",
"\n",
" val_loss.append(loss.item()) \n",
" a = target.data.cpu().numpy()\n",
" b = output[:,-1].detach().cpu().numpy()\n",
" val_acc.append(sum(np.argmax(a, axis=1) == output.argmax(1).cpu().numpy()) / len(a))\n",
"\n",
"\n",
" print(f'Epoch {epoch}, train loss: {np.mean(train_loss):.4f}, valid loss: {np.mean(val_loss):.4f}, \\\n",
" train acc: {np.mean(train_acc):.4f}, valid acc: {np.mean(val_acc):.4f}')\n",
" train_accuracy.append(np.mean(train_acc))\n",
" valid_accuracy.append(np.mean(val_acc))\n",
" valid_loss = np.mean(val_loss)\n",
" if valid_loss <= valid_loss_min:\n",
" print('Validation loss decreased ({:.6f} --> {:.6f}). Saving model ...'.format(\n",
" valid_loss_min,\n",
" valid_loss))\n",
" torch.save(model_conv.state_dict(), 'model.pt')\n",
" valid_loss_min = valid_loss\n",
" p = 0\n",
" best_epoch = epoch\n",
" valid_loss_hist.append(valid_loss)\n",
" train_loss_hist.append(np.mean(train_loss))\n",
"\n",
" # check if validation loss didn't improve\n",
" if valid_loss > valid_loss_min:\n",
" p += 1\n",
" print(f'{p} epochs of increasing val loss')\n",
" if p > patience:\n",
" print('Stopping training')\n",
" stop = True\n",
" break \n",
" \n",
" model_scheduler.step(valid_loss)\n",
" \n",
" if stop:\n",
" break\n",
" \n",
"print(f'Best train_accuracy: {max(train_accuracy)* 100:.4f}%. Best valid_accuracy: {max(valid_accuracy)* 100:.4f}%. \\\n",
" Loss: {valid_loss_min:.4f}')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"states = {'epoch': epoch + 1,\n",
" 'state_dict': model_conv.state_dict(),\n",
" 'optimizer': optimizer.state_dict()}\n",
"\n",
"torch.save(states, 'model.pt')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"stats_dict = {'train_acc': train_accuracy, 'valid_acc': valid_accuracy,\n",
" 'train_loss': train_loss_hist, 'valid_loss': valid_loss_hist, 'best_epoch': best_epoch}\n",
"\n",
"with open('stats.json', 'w') as f:\n",
" json.dump(stats_dict, f)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment