############################################################ |
############################################################ |
name: "ScenesSiamese" |
############################################################ |
############################################################ |
# Train data layers |
############################################################ |
layer { |
name: "data_image" |
type: "ImageData" |
top: "image" |
top: "label_image" |
include { phase: TRAIN } |
image_data_param { |
source: "/raid/lbaraldi/scene/bbc_train_image.txt" |
batch_size: 100 |
} |
transform_param { |
mirror: true |
crop_size: 227 |
mean_file: "data/ilsvrc12/imagenet_mean.binaryproto" |
} |
} |
layer { |
name: "data_image_p" |
type: "ImageData" |
top: "image_p" |
top: "label_image_p" |
include { phase: TRAIN } |
image_data_param { |
source: "/raid/lbaraldi/scene/bbc_train_image_p.txt" |
batch_size: 100 |
} |
transform_param { |
mirror: true |
crop_size: 227 |
mean_file: "data/ilsvrc12/imagenet_mean.binaryproto" |
} |
} |
layer { |
name: "data_multimodal" |
type: "HDF5Data" |
top: "time" |
top: "time_p" |
top: "shot_id" |
top: "shot_id_p" |
top: "histograms" |
top: "histograms_p" |
top: "label" |
include { phase: TRAIN } |
hdf5_data_param { |
source: "/raid/lbaraldi/scene/bbc_train_h5.txt" |
batch_size: 100 |
} |
} |
############################################################ |
# Test data layers |
############################################################ |
layer { |
name: "data_image" |
type: "ImageData" |
top: "image" |
top: "label_image" |
include { phase: TEST } |
image_data_param { |
source: "/raid/lbaraldi/scene/bbc_test_image.txt" |
batch_size: 100 |
} |
transform_param { |
mirror: true |
crop_size: 227 |
mean_file: "data/ilsvrc12/imagenet_mean.binaryproto" |
} |
} |
layer { |
name: "data_image_p" |
type: "ImageData" |
top: "image_p" |
top: "label_image_p" |
include { phase: TEST } |
image_data_param { |
source: "/raid/lbaraldi/scene/bbc_test_image_p.txt" |
batch_size: 100 |
} |
transform_param { |
mirror: true |
crop_size: 227 |
mean_file: "data/ilsvrc12/imagenet_mean.binaryproto" |
} |
} |
layer { |
name: "data_multimodal" |
type: "HDF5Data" |
top: "time" |
top: "time_p" |
top: "shot_id" |
top: "shot_id_p" |
top: "histograms" |
top: "histograms_p" |
top: "label" |
include { phase: TEST } |
hdf5_data_param { |
source: "/raid/lbaraldi/scene/bbc_test_h5.txt" |
batch_size: 100 |
} |
} |
############################################################ |
############################################################ |
layer { |
name: "conv1" |
type: "Convolution" |
bottom: "image" |
top: "conv1" |
param { |
name: "conv1_w" |
lr_mult: 1 |
decay_mult: 1 |
} |
param { |
name: "conv1_b" |
lr_mult: 2 |
decay_mult: 0 |
} |
convolution_param { |
num_output: 96 |
kernel_size: 11 |
stride: 4 |
weight_filler { |
type: "gaussian" |
std: 0.01 |
} |
bias_filler { |
type: "constant" |
value: 0 |
} |
} |
} |
layer { |
name: "relu1" |
type: "ReLU" |
bottom: "conv1" |
top: "conv1" |
} |
layer { |
name: "pool1" |
type: "Pooling" |
bottom: "conv1" |
top: "pool1" |
pooling_param { |
pool: MAX |
kernel_size: 3 |
stride: 2 |
} |
} |
layer { |
name: "norm1" |
type: "LRN" |
bottom: "pool1" |
top: "norm1" |
lrn_param { |
local_size: 5 |
alpha: 0.0001 |
beta: 0.75 |
} |
} |
layer { |
name: "conv2" |
type: "Convolution" |
bottom: "norm1" |
top: "conv2" |
param { |
name: "conv2_w" |
lr_mult: 1 |
decay_mult: 1 |
} |
param { |
name: "conv2_b" |
lr_mult: 2 |
decay_mult: 0 |
} |
convolution_param { |
num_output: 256 |
pad: 2 |
kernel_size: 5 |
group: 2 |
weight_filler { |
type: "gaussian" |
std: 0.01 |
} |
bias_filler { |
type: "constant" |
value: 1 |
} |
} |
} |
layer { |
name: "relu2" |
type: "ReLU" |
bottom: "conv2" |
top: "conv2" |
} |
layer { |
name: "pool2" |
type: "Pooling" |
bottom: "conv2" |
top: "pool2" |
pooling_param { |
pool: MAX |
kernel_size: 3 |
stride: 2 |
} |
} |
layer { |
name: "norm2" |
type: "LRN" |
bottom: "pool2" |
top: "norm2" |
lrn_param { |
local_size: 5 |
alpha: 0.0001 |
beta: 0.75 |
} |
} |
layer { |
name: "conv3" |
type: "Convolution" |
bottom: "norm2" |
top: "conv3" |
param { |
name: "conv3_w" |
lr_mult: 1 |
decay_mult: 1 |
} |
param { |
name: "conv3_b" |
lr_mult: 2 |
decay_mult: 0 |
} |
convolution_param { |
num_output: 384 |
pad: 1 |
kernel_size: 3 |
weight_filler { |
type: "gaussian" |
std: 0.01 |
} |
bias_filler { |
type: "constant" |
value: 0 |
} |
} |
} |
layer { |
name: "relu3" |
type: "ReLU" |
bottom: "conv3" |
top: "conv3" |
} |
layer { |
name: "conv4" |
type: "Convolution" |
bottom: "conv3" |
top: "conv4" |
param { |
name: "conv4_w" |
lr_mult: 1 |
decay_mult: 1 |
} |
param { |
name: "conv4_b" |
lr_mult: 2 |
decay_mult: 0 |
} |
convolution_param { |
num_output: 384 |
pad: 1 |
kernel_size: 3 |
group: 2 |
weight_filler { |
type: "gaussian" |
std: 0.01 |
} |
bias_filler { |
type: "constant" |
value: 1 |
} |
} |
} |
layer { |
name: "relu4" |
type: "ReLU" |
bottom: "conv4" |
top: "conv4" |
} |
layer { |
name: "conv5" |
type: "Convolution" |
bottom: "conv4" |
top: "conv5" |
param { |
name: "conv5_w" |
lr_mult: 1 |
decay_mult: 1 |
} |
param { |
name: "conv5_b" |
lr_mult: 2 |
decay_mult: 0 |
} |
convolution_param { |
num_output: 256 |
pad: 1 |
kernel_size: 3 |
group: 2 |
weight_filler { |
type: "gaussian" |
std: 0.01 |
} |
bias_filler { |
type: "constant" |
value: 1 |
} |
} |
} |
layer { |
name: "relu5" |
type: "ReLU" |
bottom: "conv5" |
top: "conv5" |
} |
layer { |
name: "pool5" |
type: "Pooling" |
bottom: "conv5" |
top: "pool5" |
pooling_param { |
pool: MAX |
kernel_size: 3 |
stride: 2 |
} |
} |
layer { |
name: "fc6" |
type: "InnerProduct" |
bottom: "pool5" |
top: "fc6" |
param { |
name: "fc6_w" |
lr_mult: 1 |
decay_mult: 1 |
} |
param { |
name: "fc6_b" |
lr_mult: 2 |
decay_mult: 0 |
} |
inner_product_param { |
num_output: 4096 |
weight_filler { |
type: "gaussian" |
std: 0.005 |
} |
bias_filler { |
type: "constant" |
value: 1 |
} |
} |
} |
layer { |
name: "relu6" |
type: "ReLU" |
bottom: "fc6" |
top: "fc6" |
} |
layer { |
name: "drop6" |
type: "Dropout" |
bottom: "fc6" |
top: "fc6" |
dropout_param { |
dropout_ratio: 0.5 |
} |
} |
layer { |
name: "fc7" |
type: "InnerProduct" |
bottom: "fc6" |
top: "fc7" |
param { |
name: "fc7_w" |
lr_mult: 1 |
decay_mult: 1 |
} |
param { |
name: "fc7_b" |
lr_mult: 2 |
decay_mult: 0 |
} |
inner_product_param { |
num_output: 4096 |
weight_filler { |
type: "gaussian" |
std: 0.005 |
} |
bias_filler { |
type: "constant" |
value: 1 |
} |
} |
} |
layer { |
name: "relu7" |
type: "ReLU" |
bottom: "fc7" |
top: "fc7" |
} |
layer { |
name: "drop7" |
type: "Dropout" |
bottom: "fc7" |
top: "fc7" |
dropout_param { |
dropout_ratio: 0.5 |
} |
} |
layer { |
name: "fc8" |
type: "InnerProduct" |
bottom: "fc7" |
top: "fc8" |
param { |
name: "fc8_w" |
lr_mult: 1 |
decay_mult: 1 |
} |
param { |
name: "fc8_b" |
lr_mult: 2 |
decay_mult: 0 |
} |
inner_product_param { |
num_output: 1183 |
weight_filler { |
type: "gaussian" |
std: 0.01 |
} |
bias_filler { |
type: "constant" |
value: 0 |
} |
} |
} |
layer { |
name: "fc8_relu" |
type: "ReLU" |
bottom: "fc8" |
top: "fc8" |
} |
############################################################ |
############################################################ |
layer { |
name: "conv1_p" |
type: "Convolution" |
bottom: "image_p" |
top: "conv1_p" |
param { |
name: "conv1_w" |
lr_mult: 1 |
decay_mult: 1 |
} |
param { |
name: "conv1_b" |
lr_mult: 2 |
decay_mult: 0 |
} |
convolution_param { |
num_output: 96 |
kernel_size: 11 |
stride: 4 |
weight_filler { |
type: "gaussian" |
std: 0.01 |
} |
bias_filler { |
type: "constant" |
value: 0 |
} |
} |
} |
layer { |
name: "relu1_p" |
type: "ReLU" |
bottom: "conv1_p" |
top: "conv1_p" |
} |
layer { |
name: "pool1_p" |
type: "Pooling" |
bottom: "conv1_p" |
top: "pool1_p" |
pooling_param { |
pool: MAX |
kernel_size: 3 |
stride: 2 |
} |
} |
layer { |
name: "norm1_p" |
type: "LRN" |
bottom: "pool1_p" |
top: "norm1_p" |
lrn_param { |
local_size: 5 |
alpha: 0.0001 |
beta: 0.75 |
} |
} |
layer { |
name: "conv2_p" |
type: "Convolution" |
bottom: "norm1_p" |
top: "conv2_p" |
param { |
name: "conv2_w" |
lr_mult: 1 |
decay_mult: 1 |
} |
param { |
name: "conv2_b" |
lr_mult: 2 |
decay_mult: 0 |
} |
convolution_param { |
num_output: 256 |
pad: 2 |
kernel_size: 5 |
group: 2 |
weight_filler { |
type: "gaussian" |
std: 0.01 |
} |
bias_filler { |
type: "constant" |
value: 1 |
} |
} |
} |
layer { |
name: "relu2_p" |
type: "ReLU" |
bottom: "conv2_p" |
top: "conv2_p" |
} |
layer { |
name: "pool2_p" |
type: "Pooling" |
bottom: "conv2_p" |
top: "pool2_p" |
pooling_param { |
pool: MAX |
kernel_size: 3 |
stride: 2 |
} |
} |
layer { |
name: "norm2_p" |
type: "LRN" |
bottom: "pool2_p" |
top: "norm2_p" |
lrn_param { |
local_size: 5 |
alpha: 0.0001 |
beta: 0.75 |
} |
} |
layer { |
name: "conv3_p" |
type: "Convolution" |
bottom: "norm2_p" |
top: "conv3_p" |
param { |
name: "conv3_w" |
lr_mult: 1 |
decay_mult: 1 |
} |
param { |
name: "conv3_b" |
lr_mult: 2 |
decay_mult: 0 |
} |
convolution_param { |
num_output: 384 |
pad: 1 |
kernel_size: 3 |
weight_filler { |
type: "gaussian" |
std: 0.01 |
} |
bias_filler { |
type: "constant" |
value: 0 |
} |
} |
} |
layer { |
name: "relu3_p" |
type: "ReLU" |
bottom: "conv3_p" |
top: "conv3_p" |
} |
layer { |
name: "conv4_p" |
type: "Convolution" |
bottom: "conv3_p" |
top: "conv4_p" |
param { |
name: "conv4_w" |
lr_mult: 1 |
decay_mult: 1 |
} |
param { |
name: "conv4_b" |
lr_mult: 2 |
decay_mult: 0 |
} |
convolution_param { |
num_output: 384 |
pad: 1 |
kernel_size: 3 |
group: 2 |
weight_filler { |
type: "gaussian" |
std: 0.01 |
} |
bias_filler { |
type: "constant" |
value: 1 |
} |
} |
} |
layer { |
name: "relu4_p" |
type: "ReLU" |
bottom: "conv4_p" |
top: "conv4_p" |
} |
layer { |
name: "conv5_p" |
type: "Convolution" |
bottom: "conv4_p" |
top: "conv5_p" |
param { |
name: "conv5_w" |
lr_mult: 1 |
decay_mult: 1 |
} |
param { |
name: "conv5_b" |
lr_mult: 2 |
decay_mult: 0 |
} |
convolution_param { |
num_output: 256 |
pad: 1 |
kernel_size: 3 |
group: 2 |
weight_filler { |
type: "gaussian" |
std: 0.01 |
} |
bias_filler { |
type: "constant" |
value: 1 |
} |
} |
} |
layer { |
name: "relu5_p" |
type: "ReLU" |
bottom: "conv5_p" |
top: "conv5_p" |
} |
layer { |
name: "pool5_p" |
type: "Pooling" |
bottom: "conv5_p" |
top: "pool5_p" |
pooling_param { |
pool: MAX |
kernel_size: 3 |
stride: 2 |
} |
} |
layer { |
name: "fc6_p" |
type: "InnerProduct" |
bottom: "pool5_p" |
top: "fc6_p" |
param { |
name: "fc6_w" |
lr_mult: 1 |
decay_mult: 1 |
} |
param { |
name: "fc6_b" |
lr_mult: 2 |
decay_mult: 0 |
} |
inner_product_param { |
num_output: 4096 |
weight_filler { |
type: "gaussian" |
std: 0.005 |
} |
bias_filler { |
type: "constant" |
value: 1 |
} |
} |
} |
layer { |
name: "relu6_p" |
type: "ReLU" |
bottom: "fc6_p" |
top: "fc6_p" |
} |
layer { |
name: "drop6_p" |
type: "Dropout" |
bottom: "fc6_p" |
top: "fc6_p" |
dropout_param { |
dropout_ratio: 0.5 |
} |
} |
layer { |
name: "fc7_p" |
type: "InnerProduct" |
bottom: "fc6_p" |
top: "fc7_p" |
param { |
name: "fc7_w" |
lr_mult: 1 |
decay_mult: 1 |
} |
param { |
name: "fc7_b" |
lr_mult: 2 |
decay_mult: 0 |
} |
inner_product_param { |
num_output: 4096 |
weight_filler { |
type: "gaussian" |
std: 0.005 |
} |
bias_filler { |
type: "constant" |
value: 1 |
} |
} |
} |
layer { |
name: "relu7_p" |
type: "ReLU" |
bottom: "fc7_p" |
top: "fc7_p" |
} |
layer { |
name: "drop7_p" |
type: "Dropout" |
bottom: "fc7_p" |
top: "fc7_p" |
dropout_param { |
dropout_ratio: 0.5 |
} |
} |
layer { |
name: "fc8_p" |
type: "InnerProduct" |
bottom: "fc7_p" |
top: "fc8_p" |
param { |
name: "fc8_w" |
lr_mult: 1 |
decay_mult: 1 |
} |
param { |
name: "fc8_b" |
lr_mult: 2 |
decay_mult: 0 |
} |
inner_product_param { |
num_output: 1183 |
weight_filler { |
type: "gaussian" |
std: 0.01 |
} |
bias_filler { |
type: "constant" |
value: 0 |
} |
} |
} |
layer { |
name: "fc8_p_relu" |
type: "ReLU" |
bottom: "fc8_p" |
top: "fc8_p" |
} |
############################################################ |
############################################################ |
layer { |
name: "merge" |
type: "Concat" |
bottom: "fc8" |
bottom: "time" |
bottom: "histograms" |
top: "merge" |
} |
layer { |
name: "merge_p" |
type: "Concat" |
bottom: "fc8_p" |
bottom: "time_p" |
bottom: "histograms_p" |
top: "merge_p" |
} |
layer { |
name: "fc_final" |
type: "InnerProduct" |
bottom: "merge" |
top: "fc_final" |
param { |
name: "fc_final_w" |
lr_mult: 4 # Higher learning rate since this layer is starting from random |
decay_mult: 1 |
} |
param { |
name: "fc_final_b" |
lr_mult: 4 # Higher learning rate since this layer is starting from random |
decay_mult: 0 |
} |
inner_product_param { |
num_output: 200 |
weight_filler { |
type: "gaussian" |
std: 0.01 |
} |
bias_filler { |
type: "constant" |
value: 0.01 |
} |
} |
} |
layer { |
name: "fc_final_p" |
type: "InnerProduct" |
bottom: "merge_p" |
top: "fc_final_p" |
param { |
name: "fc_final_w" |
lr_mult: 4 # Higher learning rate since this layer is starting from random |
decay_mult: 1 |
} |
param { |
name: "fc_final_b" |
lr_mult: 4 # Higher learning rate since this layer is starting from random |
decay_mult: 0 |
} |
inner_product_param { |
num_output: 200 |
weight_filler { |
type: "gaussian" |
std: 0.01 |
} |
bias_filler { |
type: "constant" |
value: 0.01 |
} |
} |
} |
############################################################ |
############################################################ |
layer { |
name: "loss" |
type: "ContrastiveLoss" |
bottom: "fc_final" |
bottom: "fc_final_p" |
bottom: "label" |
top: "loss" |
contrastive_loss_param { |
margin: 1.0 |
} |
} |
############################################################ |
# END |
############################################################ |
layer { |
name: "shot_id_silence" |
type: "Silence" |
bottom: "shot_id" |
} |
layer { |
name: "shot_id_silence_p" |
type: "Silence" |
bottom: "shot_id_p" |
} |
layer { |
name: "label_image_silence" |
type: "Silence" |
bottom: "label_image" |
} |
layer { |
name: "label_image_silence_p" |
type: "Silence" |
bottom: "label_image_p" |
} |
I am sorry to disturb you.
I have downloaded your model of scene detection in 23rd ACM International Conference on Multimedia, 2015.
However, I can't find the bbc_train_image.txt, bbc_train_image_p.txt, bbc_train_h5.txt, bbc_test_image.txt, bbc_test_image_p.txt, bbc_test_h5.txt.
Where can I find them or download them?