Created
July 15, 2016 06:21
-
-
Save jacobandreas/897987ac03f8d4b9ea4b9e44affa00e7 to your computer and use it in GitHub Desktop.
Precompute VGG feature representations for VQA
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import caffe | |
from collections import defaultdict | |
import numpy as np | |
import os | |
import sys | |
import matplotlib.image | |
caffe.set_device(7) | |
caffe.set_mode_gpu() | |
split = "train2014" | |
IMAGE_ROOT = "/x/jda/vqa/Images/" + split + "/raw" | |
RAW_ROOT = IMAGE_ROOT | |
IMAGE_CONV_DEST = "/x/jda/vqa/Images/" + split + "/conv" | |
IMAGE_FC_DEST = "/x/jda/vqa/Images/" + split + "/fc" | |
BATCH_SIZE = 32 | |
VGG_FINE_TUNED = "/x/rohrbach/lrcn_finetune_vgg_trainval_iter_100000.caffemodel" | |
net = caffe.Net("/home/jda/vggnet/VGG_ILSVRC_16_layers_deploy.prototxt", | |
VGG_FINE_TUNED, | |
caffe.TEST) | |
print net.blobs.keys() | |
all_image_names = os.listdir(RAW_ROOT) | |
all_image_names = [n for n in all_image_names if n[-3:] == "jpg"] | |
image_names_by_size = defaultdict(list) | |
for n in all_image_names: | |
full_name = os.path.join(RAW_ROOT, n) | |
try: | |
image = caffe.io.load_image(full_name) | |
except Exception as e: | |
print >>sys.stderr, "unable to load image " + full_name | |
continue | |
width, height = image.shape[:2] | |
image_names_by_size[width, height].append(n) | |
total_count = 0 | |
for size, names in image_names_by_size.items(): | |
n_images = len(names) | |
print ">", size, n_images | |
i = 0 | |
while i < n_images: | |
real_count = BATCH_SIZE if i + BATCH_SIZE < n_images else n_images - i | |
names_here = [os.path.join(RAW_ROOT, names[i+j]) for j in range(real_count)] | |
images = [caffe.io.load_image(name) for name in names_here] | |
#images = [caffe.io.resize_image(image, (224,224)) for image in images] | |
images = [caffe.io.resize_image(image, (448,448)) for image in images] | |
net.blobs["data"].reshape(real_count, 3, images[0].shape[0], images[0].shape[1]) | |
transformer = caffe.io.Transformer({'data': net.blobs["data"].data.shape}) | |
transformer.set_transpose("data", (2, 0, 1)) | |
transformer.set_mean("data", np.load("caffe/imagenet/ilsvrc_2012_mean.npy").mean(1).mean(1)) | |
transformer.set_raw_scale("data", 255) | |
transformer.set_channel_swap("data", (2,1,0)) | |
proc_images = [transformer.preprocess("data", image) for image in images] | |
fw = net.forward(data=np.asarray(proc_images)) | |
embeddings = net.blobs["pool5"].data.copy() | |
print ">>", embeddings.shape | |
for j in range(real_count): | |
print IMAGE_CONV_DEST + "/" + names[i+j] | |
np.savez(IMAGE_CONV_DEST + "/" + names[i+j], embeddings[j,:,:,:]) | |
print total_count, i | |
total_count += real_count | |
i += BATCH_SIZE |
Hi.
It seems that you used something different from default VGG prototxt file and model, where can we find them?
Thanks.
You can use a standard VGG model instead of the fine-tuned one we're using here. You should modify the prototxt by chopping everything off after the last convolutional layer.
@omidb Same problem here. How did you solve it?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks for the script. Can I substitute
lrcn_finetune_vgg_trainval_iter_100000.caffemodel
with the original vgg16 model?Using original vgg16 model, I get following error (not sure if it's related to that model):