sizhky sizhky

Inception Networks. Role of Auxiliary loss in Training and Inference

Deep networks suffer the problem of vanishing gradient, i.e., gradients near to the loss layer are much larger and as they get distributed to the previous layers backward, the magnitude of gradients diminish.

Inception tackles this problem by using the concept of auxiliary losses where (see fig) it branches out convolution outputs from 4a and 4d into their respective mini classification heads that employ standard softmax - predicting on the same classes as the main task

Hence during training, there will be a strong gradient flow from loss:0 to adjust layers 4a (and preceding layers) more aggressively. Similar is the case with layers "4d,4c and 4b" where the gradients are high from "loss:1" and for layers 4e onwards which are closer to "loss:2".

	class OpenImages(Dataset):
	def __init__(self, image_folder, df):
	self.root = image_folder
	self.df = df
	self.unique_images = df['ImageID'].unique()
	def __len__(self): return len(self.unique_images)
	def __getitem__(self, ix):
	image_id = self.unique_images[ix]
	image_path = f'{self.root}/{image_id}.jpg'
	image = cv2.imread(image_path, 1)[...,::-1] # conver BGR to RGB

	df = pd.read_csv('../validation-annotations-bbox.csv')
	df['LabelName'] = df['LabelName'].map(lambda x: code2label[x])
	unique_classes = df['LabelName'].value_counts()
	unique_classes = unique_classes[unique_classes < 501]
	df = df[df['LabelName'].map(lambda x: x in unique_classes)]
	print(df.ImageID.nunique())

	!mkdir -p open-images-mini
	from tqdm import tqdm
	for f in tqdm(df.ImageID.unique()):

	project_url = 'https://github.com/sizhky/rcnn'
	import os
	if not os.path.exists('faster-rcnn'):
	!git clone --quiet {project_url} rcnn
	!pip install -q --upgrade imgaug fire torch_snippets
	from google.colab import files
	files.upload() # upload kaggle.json
	!mkdir -p ~/.kaggle
	!mv kaggle.json ~/.kaggle/
	!ls ~/.kaggle

	import sys; sys.path.append('/home/yyr/data/VOCdevkit/')
	from load_data import VOCDataset, get_items, aug_trn
	from snippets.loader import *

	_2007_root = Path("/home/yyr/data/VOCdevkit/VOC2007")
	_2012_root = Path("/home/yyr/data/VOCdevkit/VOC2012")
	train_items = get_items(_2007_root, 'train') + get_items(_2012_root, 'train')
	val_items = get_items(_2007_root, 'val') + get_items(_2012_root, 'val')
	logger.info(f'\n{len(train_items)} training images\n{len(val_items)} validation images')
	x = VOCDataset(train_items, tfms=aug_trn)

	from snippets.loader import *
	from PIL import Image
	import xml.etree.ElementTree as ET
	from torchvision import transforms

	device = 'cuda'
	voc_labels = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable',
	'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor')
	label_map = {k: v + 1 for v, k in enumerate(voc_labels)}
	label_map['background'] = 0

	import time
	info = lambda report: '\t'.join([f'{k}: {v:.3f}' for k,v in report.items()])

	def report_metrics(pos, **report):
	elapsed = time.time()-start
	end = report.pop('end','\n')
	elapsed = '\t({:.2f}s - {:.2f}s remaining)'.format(time.time()-start, ((n_epochs-pos)/pos)*elapsed)
	current_iteration = f'EPOCH: {pos:.3f}\t'
	print(current_iteration + info(report) + elapsed, end=end)