import torch
import numpy as np
import argparse
from torchvision import models
import matplotlib.pyplot as plt
import cv2
import os
resnet = models.resnet50(pretrained=True)
class FeatureExtractor():
""" Class for extracting activations and
registering gradients from targetted intermediate layers """
def __init__(self, model, target_layers):
self.model = model
self.target_layers = target_layers
self.gradients = []
def save_gradient(self, grad):
def __call__(self, x):
outputs = []
self.gradients = []
for name, module in self.model._modules.items():
x = module(x)
print('name=', name)
print('x.size()=', x.size())
if name in self.target_layers:
outputs += [x]
print('outputs.size()=', x.size())
return outputs, x
class ModelOutputs():
""" Class for making a forward pass, and getting:
1. The network output.
2. Activations from intermeddiate targetted layers.
3. Gradients from intermeddiate targetted layers. """
def __init__(self, model, target_layers, use_cuda):
self.model = model
self.feature_extractor = FeatureExtractor(self.model, target_layers)
self.cuda = use_cuda
def get_gradients(self):
return self.feature_extractor.gradients
def __call__(self, x):
target_activations, output = self.feature_extractor(x)
output = output.view(output.size(0), -1)
if self.cuda:
output = output.cpu()
output = resnet.fc(output).cuda()
output = resnet.fc(output)
return target_activations, output
def preprocess_image(img):
means = [0.485, 0.456, 0.406]
stds = [0.229, 0.224, 0.225]
preprocessed_img = img.copy()[:, :, ::-1]
for i in range(3):
preprocessed_img[:, :, i] = preprocessed_img[:, :, i] - means[i]
preprocessed_img[:, :, i] = preprocessed_img[:, :, i] / stds[i]
preprocessed_img = \
np.ascontiguousarray(np.transpose(preprocessed_img, (2, 0, 1)))
preprocessed_img = torch.from_numpy(preprocessed_img)
input = torch.Tensor(preprocessed_img)
return input
def show_cam_on_image(img, mask):
heatmap = cv2.applyColorMap(np.uint8(255 * mask), cv2.COLORMAP_JET)
heatmap = np.float32(heatmap) / 255
cam = heatmap*0.3 + np.float32(img)*0.7
cam = cam / np.max(cam)
cam = np.uint8(255 * cam)
return cam
class GradCam:
def __init__(self, model, target_layer_names, use_cuda):
self.model = model
self.cuda = use_cuda
if self.cuda:
self.model = model.cuda()
self.extractor = ModelOutputs(self.model, target_layer_names, use_cuda)
def forward(self, input):
return self.model(input)
def __call__(self, input, index=None):
if self.cuda:
features, output = self.extractor(input.cuda())
features, output = self.extractor(input)
if index == None:
index = np.argmax(output.cpu().data.numpy())
one_hot = np.zeros((1, output.size()[-1]), dtype=np.float32)
one_hot[0][index] = 1
one_hot = torch.Tensor(torch.from_numpy(one_hot))
if self.cuda:
one_hot = torch.sum(one_hot.cuda() * output)
one_hot = torch.sum(one_hot * output)
print("my one_hot out:", one_hot)
grads_val = self.extractor.get_gradients()[-1].cpu().data.numpy()
print('grads_val', grads_val.shape)
target = features[-1]
target = target.cpu().data.numpy()[0, :]
weights = np.mean(grads_val, axis=(2, 3))[0, :]
print('weights', weights.shape)
cam = np.zeros(target.shape[1:], dtype=np.float32)
print('cam', cam.shape)
print('features', features[-1].shape)
print('target', target.shape)
for i, w in enumerate(weights):
cam += w * target[i, :, :]
cam = np.maximum(cam, 0)
cam = cv2.resize(cam, (224, 224))
cam = cam - np.min(cam)
cam = cam / np.max(cam)
return cam
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument('--use-cuda', action='store_true', default=False,
help='Use NVIDIA GPU acceleration')
parser.add_argument('--image-path', type=str, default='./examples/',
help='Input image path')
args = parser.parse_args()
args.use_cuda = args.use_cuda and torch.cuda.is_available()
return args
if __name__ == '__main__':
model = models.resnet50(pretrained=True)
del model.fc
image = []
grad_cam = GradCam(model, target_layer_names=["layer4"], use_cuda=False)
image_path = '/data00/home/kongtao/data00/home/kongtao/workspace/images'
image_name = 'cat1.jpeg'
image = cv2.imread(os.path.join(image_path, image_name))
image = np.float32(cv2.resize(image, (224, 224))) / 255
input = preprocess_image(image)
target_index = None
mask = grad_cam(input, target_index)
vis_map = show_cam_on_image(image, mask)
