|
############################################################ |
|
# BEGIN |
|
############################################################ |
|
name: "ScenesSiamese" |
|
|
|
############################################################ |
|
# DATA LAYERS |
|
############################################################ |
|
# Train data layers |
|
############################################################ |
|
layer { |
|
name: "data_image" |
|
type: "ImageData" |
|
top: "image" |
|
top: "label_image" |
|
include { phase: TRAIN } |
|
image_data_param { |
|
source: "/raid/lbaraldi/scene/bbc_train_image.txt" |
|
batch_size: 100 |
|
} |
|
transform_param { |
|
mirror: true |
|
crop_size: 227 |
|
mean_file: "data/ilsvrc12/imagenet_mean.binaryproto" |
|
} |
|
} |
|
|
|
layer { |
|
name: "data_image_p" |
|
type: "ImageData" |
|
top: "image_p" |
|
top: "label_image_p" |
|
include { phase: TRAIN } |
|
image_data_param { |
|
source: "/raid/lbaraldi/scene/bbc_train_image_p.txt" |
|
batch_size: 100 |
|
} |
|
transform_param { |
|
mirror: true |
|
crop_size: 227 |
|
mean_file: "data/ilsvrc12/imagenet_mean.binaryproto" |
|
} |
|
} |
|
|
|
layer { |
|
name: "data_multimodal" |
|
type: "HDF5Data" |
|
top: "time" |
|
top: "time_p" |
|
top: "shot_id" |
|
top: "shot_id_p" |
|
top: "histograms" |
|
top: "histograms_p" |
|
top: "label" |
|
include { phase: TRAIN } |
|
hdf5_data_param { |
|
source: "/raid/lbaraldi/scene/bbc_train_h5.txt" |
|
batch_size: 100 |
|
} |
|
} |
|
|
|
############################################################ |
|
# Test data layers |
|
############################################################ |
|
layer { |
|
name: "data_image" |
|
type: "ImageData" |
|
top: "image" |
|
top: "label_image" |
|
include { phase: TEST } |
|
image_data_param { |
|
source: "/raid/lbaraldi/scene/bbc_test_image.txt" |
|
batch_size: 100 |
|
} |
|
transform_param { |
|
mirror: true |
|
crop_size: 227 |
|
mean_file: "data/ilsvrc12/imagenet_mean.binaryproto" |
|
} |
|
} |
|
|
|
layer { |
|
name: "data_image_p" |
|
type: "ImageData" |
|
top: "image_p" |
|
top: "label_image_p" |
|
include { phase: TEST } |
|
image_data_param { |
|
source: "/raid/lbaraldi/scene/bbc_test_image_p.txt" |
|
batch_size: 100 |
|
} |
|
transform_param { |
|
mirror: true |
|
crop_size: 227 |
|
mean_file: "data/ilsvrc12/imagenet_mean.binaryproto" |
|
} |
|
} |
|
|
|
layer { |
|
name: "data_multimodal" |
|
type: "HDF5Data" |
|
top: "time" |
|
top: "time_p" |
|
top: "shot_id" |
|
top: "shot_id_p" |
|
top: "histograms" |
|
top: "histograms_p" |
|
top: "label" |
|
include { phase: TEST } |
|
hdf5_data_param { |
|
source: "/raid/lbaraldi/scene/bbc_test_h5.txt" |
|
batch_size: 100 |
|
} |
|
} |
|
|
|
|
|
|
|
############################################################ |
|
# FIRST CONVOLUTIONAL NEURAL NETWORK |
|
############################################################ |
|
layer { |
|
name: "conv1" |
|
type: "Convolution" |
|
bottom: "image" |
|
top: "conv1" |
|
param { |
|
name: "conv1_w" |
|
lr_mult: 1 |
|
decay_mult: 1 |
|
} |
|
param { |
|
name: "conv1_b" |
|
lr_mult: 2 |
|
decay_mult: 0 |
|
} |
|
convolution_param { |
|
num_output: 96 |
|
kernel_size: 11 |
|
stride: 4 |
|
weight_filler { |
|
type: "gaussian" |
|
std: 0.01 |
|
} |
|
bias_filler { |
|
type: "constant" |
|
value: 0 |
|
} |
|
} |
|
} |
|
layer { |
|
name: "relu1" |
|
type: "ReLU" |
|
bottom: "conv1" |
|
top: "conv1" |
|
} |
|
layer { |
|
name: "pool1" |
|
type: "Pooling" |
|
bottom: "conv1" |
|
top: "pool1" |
|
pooling_param { |
|
pool: MAX |
|
kernel_size: 3 |
|
stride: 2 |
|
} |
|
} |
|
layer { |
|
name: "norm1" |
|
type: "LRN" |
|
bottom: "pool1" |
|
top: "norm1" |
|
lrn_param { |
|
local_size: 5 |
|
alpha: 0.0001 |
|
beta: 0.75 |
|
} |
|
} |
|
layer { |
|
name: "conv2" |
|
type: "Convolution" |
|
bottom: "norm1" |
|
top: "conv2" |
|
param { |
|
name: "conv2_w" |
|
lr_mult: 1 |
|
decay_mult: 1 |
|
} |
|
param { |
|
name: "conv2_b" |
|
lr_mult: 2 |
|
decay_mult: 0 |
|
} |
|
convolution_param { |
|
num_output: 256 |
|
pad: 2 |
|
kernel_size: 5 |
|
group: 2 |
|
weight_filler { |
|
type: "gaussian" |
|
std: 0.01 |
|
} |
|
bias_filler { |
|
type: "constant" |
|
value: 1 |
|
} |
|
} |
|
} |
|
layer { |
|
name: "relu2" |
|
type: "ReLU" |
|
bottom: "conv2" |
|
top: "conv2" |
|
} |
|
layer { |
|
name: "pool2" |
|
type: "Pooling" |
|
bottom: "conv2" |
|
top: "pool2" |
|
pooling_param { |
|
pool: MAX |
|
kernel_size: 3 |
|
stride: 2 |
|
} |
|
} |
|
layer { |
|
name: "norm2" |
|
type: "LRN" |
|
bottom: "pool2" |
|
top: "norm2" |
|
lrn_param { |
|
local_size: 5 |
|
alpha: 0.0001 |
|
beta: 0.75 |
|
} |
|
} |
|
layer { |
|
name: "conv3" |
|
type: "Convolution" |
|
bottom: "norm2" |
|
top: "conv3" |
|
param { |
|
name: "conv3_w" |
|
lr_mult: 1 |
|
decay_mult: 1 |
|
} |
|
param { |
|
name: "conv3_b" |
|
lr_mult: 2 |
|
decay_mult: 0 |
|
} |
|
convolution_param { |
|
num_output: 384 |
|
pad: 1 |
|
kernel_size: 3 |
|
weight_filler { |
|
type: "gaussian" |
|
std: 0.01 |
|
} |
|
bias_filler { |
|
type: "constant" |
|
value: 0 |
|
} |
|
} |
|
} |
|
layer { |
|
name: "relu3" |
|
type: "ReLU" |
|
bottom: "conv3" |
|
top: "conv3" |
|
} |
|
layer { |
|
name: "conv4" |
|
type: "Convolution" |
|
bottom: "conv3" |
|
top: "conv4" |
|
param { |
|
name: "conv4_w" |
|
lr_mult: 1 |
|
decay_mult: 1 |
|
} |
|
param { |
|
name: "conv4_b" |
|
lr_mult: 2 |
|
decay_mult: 0 |
|
} |
|
convolution_param { |
|
num_output: 384 |
|
pad: 1 |
|
kernel_size: 3 |
|
group: 2 |
|
weight_filler { |
|
type: "gaussian" |
|
std: 0.01 |
|
} |
|
bias_filler { |
|
type: "constant" |
|
value: 1 |
|
} |
|
} |
|
} |
|
layer { |
|
name: "relu4" |
|
type: "ReLU" |
|
bottom: "conv4" |
|
top: "conv4" |
|
} |
|
layer { |
|
name: "conv5" |
|
type: "Convolution" |
|
bottom: "conv4" |
|
top: "conv5" |
|
param { |
|
name: "conv5_w" |
|
lr_mult: 1 |
|
decay_mult: 1 |
|
} |
|
param { |
|
name: "conv5_b" |
|
lr_mult: 2 |
|
decay_mult: 0 |
|
} |
|
convolution_param { |
|
num_output: 256 |
|
pad: 1 |
|
kernel_size: 3 |
|
group: 2 |
|
weight_filler { |
|
type: "gaussian" |
|
std: 0.01 |
|
} |
|
bias_filler { |
|
type: "constant" |
|
value: 1 |
|
} |
|
} |
|
} |
|
layer { |
|
name: "relu5" |
|
type: "ReLU" |
|
bottom: "conv5" |
|
top: "conv5" |
|
} |
|
layer { |
|
name: "pool5" |
|
type: "Pooling" |
|
bottom: "conv5" |
|
top: "pool5" |
|
pooling_param { |
|
pool: MAX |
|
kernel_size: 3 |
|
stride: 2 |
|
} |
|
} |
|
layer { |
|
name: "fc6" |
|
type: "InnerProduct" |
|
bottom: "pool5" |
|
top: "fc6" |
|
param { |
|
name: "fc6_w" |
|
lr_mult: 1 |
|
decay_mult: 1 |
|
} |
|
param { |
|
name: "fc6_b" |
|
lr_mult: 2 |
|
decay_mult: 0 |
|
} |
|
inner_product_param { |
|
num_output: 4096 |
|
weight_filler { |
|
type: "gaussian" |
|
std: 0.005 |
|
} |
|
bias_filler { |
|
type: "constant" |
|
value: 1 |
|
} |
|
} |
|
} |
|
layer { |
|
name: "relu6" |
|
type: "ReLU" |
|
bottom: "fc6" |
|
top: "fc6" |
|
} |
|
layer { |
|
name: "drop6" |
|
type: "Dropout" |
|
bottom: "fc6" |
|
top: "fc6" |
|
dropout_param { |
|
dropout_ratio: 0.5 |
|
} |
|
} |
|
layer { |
|
name: "fc7" |
|
type: "InnerProduct" |
|
bottom: "fc6" |
|
top: "fc7" |
|
param { |
|
name: "fc7_w" |
|
lr_mult: 1 |
|
decay_mult: 1 |
|
} |
|
param { |
|
name: "fc7_b" |
|
lr_mult: 2 |
|
decay_mult: 0 |
|
} |
|
inner_product_param { |
|
num_output: 4096 |
|
weight_filler { |
|
type: "gaussian" |
|
std: 0.005 |
|
} |
|
bias_filler { |
|
type: "constant" |
|
value: 1 |
|
} |
|
} |
|
} |
|
layer { |
|
name: "relu7" |
|
type: "ReLU" |
|
bottom: "fc7" |
|
top: "fc7" |
|
} |
|
layer { |
|
name: "drop7" |
|
type: "Dropout" |
|
bottom: "fc7" |
|
top: "fc7" |
|
dropout_param { |
|
dropout_ratio: 0.5 |
|
} |
|
} |
|
layer { |
|
name: "fc8" |
|
type: "InnerProduct" |
|
bottom: "fc7" |
|
top: "fc8" |
|
param { |
|
name: "fc8_w" |
|
lr_mult: 1 |
|
decay_mult: 1 |
|
} |
|
param { |
|
name: "fc8_b" |
|
lr_mult: 2 |
|
decay_mult: 0 |
|
} |
|
inner_product_param { |
|
num_output: 1183 |
|
weight_filler { |
|
type: "gaussian" |
|
std: 0.01 |
|
} |
|
bias_filler { |
|
type: "constant" |
|
value: 0 |
|
} |
|
} |
|
} |
|
layer { |
|
name: "fc8_relu" |
|
type: "ReLU" |
|
bottom: "fc8" |
|
top: "fc8" |
|
} |
|
|
|
|
|
############################################################ |
|
# SECOND CONVOLUTIONAL NEURAL NETWORK |
|
############################################################ |
|
layer { |
|
name: "conv1_p" |
|
type: "Convolution" |
|
bottom: "image_p" |
|
top: "conv1_p" |
|
param { |
|
name: "conv1_w" |
|
lr_mult: 1 |
|
decay_mult: 1 |
|
} |
|
param { |
|
name: "conv1_b" |
|
lr_mult: 2 |
|
decay_mult: 0 |
|
} |
|
convolution_param { |
|
num_output: 96 |
|
kernel_size: 11 |
|
stride: 4 |
|
weight_filler { |
|
type: "gaussian" |
|
std: 0.01 |
|
} |
|
bias_filler { |
|
type: "constant" |
|
value: 0 |
|
} |
|
} |
|
} |
|
layer { |
|
name: "relu1_p" |
|
type: "ReLU" |
|
bottom: "conv1_p" |
|
top: "conv1_p" |
|
} |
|
layer { |
|
name: "pool1_p" |
|
type: "Pooling" |
|
bottom: "conv1_p" |
|
top: "pool1_p" |
|
pooling_param { |
|
pool: MAX |
|
kernel_size: 3 |
|
stride: 2 |
|
} |
|
} |
|
layer { |
|
name: "norm1_p" |
|
type: "LRN" |
|
bottom: "pool1_p" |
|
top: "norm1_p" |
|
lrn_param { |
|
local_size: 5 |
|
alpha: 0.0001 |
|
beta: 0.75 |
|
} |
|
} |
|
layer { |
|
name: "conv2_p" |
|
type: "Convolution" |
|
bottom: "norm1_p" |
|
top: "conv2_p" |
|
param { |
|
name: "conv2_w" |
|
lr_mult: 1 |
|
decay_mult: 1 |
|
} |
|
param { |
|
name: "conv2_b" |
|
lr_mult: 2 |
|
decay_mult: 0 |
|
} |
|
convolution_param { |
|
num_output: 256 |
|
pad: 2 |
|
kernel_size: 5 |
|
group: 2 |
|
weight_filler { |
|
type: "gaussian" |
|
std: 0.01 |
|
} |
|
bias_filler { |
|
type: "constant" |
|
value: 1 |
|
} |
|
} |
|
} |
|
layer { |
|
name: "relu2_p" |
|
type: "ReLU" |
|
bottom: "conv2_p" |
|
top: "conv2_p" |
|
} |
|
layer { |
|
name: "pool2_p" |
|
type: "Pooling" |
|
bottom: "conv2_p" |
|
top: "pool2_p" |
|
pooling_param { |
|
pool: MAX |
|
kernel_size: 3 |
|
stride: 2 |
|
} |
|
} |
|
layer { |
|
name: "norm2_p" |
|
type: "LRN" |
|
bottom: "pool2_p" |
|
top: "norm2_p" |
|
lrn_param { |
|
local_size: 5 |
|
alpha: 0.0001 |
|
beta: 0.75 |
|
} |
|
} |
|
layer { |
|
name: "conv3_p" |
|
type: "Convolution" |
|
bottom: "norm2_p" |
|
top: "conv3_p" |
|
param { |
|
name: "conv3_w" |
|
lr_mult: 1 |
|
decay_mult: 1 |
|
} |
|
param { |
|
name: "conv3_b" |
|
lr_mult: 2 |
|
decay_mult: 0 |
|
} |
|
convolution_param { |
|
num_output: 384 |
|
pad: 1 |
|
kernel_size: 3 |
|
weight_filler { |
|
type: "gaussian" |
|
std: 0.01 |
|
} |
|
bias_filler { |
|
type: "constant" |
|
value: 0 |
|
} |
|
} |
|
} |
|
layer { |
|
name: "relu3_p" |
|
type: "ReLU" |
|
bottom: "conv3_p" |
|
top: "conv3_p" |
|
} |
|
layer { |
|
name: "conv4_p" |
|
type: "Convolution" |
|
bottom: "conv3_p" |
|
top: "conv4_p" |
|
param { |
|
name: "conv4_w" |
|
lr_mult: 1 |
|
decay_mult: 1 |
|
} |
|
param { |
|
name: "conv4_b" |
|
lr_mult: 2 |
|
decay_mult: 0 |
|
} |
|
convolution_param { |
|
num_output: 384 |
|
pad: 1 |
|
kernel_size: 3 |
|
group: 2 |
|
weight_filler { |
|
type: "gaussian" |
|
std: 0.01 |
|
} |
|
bias_filler { |
|
type: "constant" |
|
value: 1 |
|
} |
|
} |
|
} |
|
layer { |
|
name: "relu4_p" |
|
type: "ReLU" |
|
bottom: "conv4_p" |
|
top: "conv4_p" |
|
} |
|
layer { |
|
name: "conv5_p" |
|
type: "Convolution" |
|
bottom: "conv4_p" |
|
top: "conv5_p" |
|
param { |
|
name: "conv5_w" |
|
lr_mult: 1 |
|
decay_mult: 1 |
|
} |
|
param { |
|
name: "conv5_b" |
|
lr_mult: 2 |
|
decay_mult: 0 |
|
} |
|
convolution_param { |
|
num_output: 256 |
|
pad: 1 |
|
kernel_size: 3 |
|
group: 2 |
|
weight_filler { |
|
type: "gaussian" |
|
std: 0.01 |
|
} |
|
bias_filler { |
|
type: "constant" |
|
value: 1 |
|
} |
|
} |
|
} |
|
layer { |
|
name: "relu5_p" |
|
type: "ReLU" |
|
bottom: "conv5_p" |
|
top: "conv5_p" |
|
} |
|
layer { |
|
name: "pool5_p" |
|
type: "Pooling" |
|
bottom: "conv5_p" |
|
top: "pool5_p" |
|
pooling_param { |
|
pool: MAX |
|
kernel_size: 3 |
|
stride: 2 |
|
} |
|
} |
|
layer { |
|
name: "fc6_p" |
|
type: "InnerProduct" |
|
bottom: "pool5_p" |
|
top: "fc6_p" |
|
param { |
|
name: "fc6_w" |
|
lr_mult: 1 |
|
decay_mult: 1 |
|
} |
|
param { |
|
name: "fc6_b" |
|
lr_mult: 2 |
|
decay_mult: 0 |
|
} |
|
inner_product_param { |
|
num_output: 4096 |
|
weight_filler { |
|
type: "gaussian" |
|
std: 0.005 |
|
} |
|
bias_filler { |
|
type: "constant" |
|
value: 1 |
|
} |
|
} |
|
} |
|
layer { |
|
name: "relu6_p" |
|
type: "ReLU" |
|
bottom: "fc6_p" |
|
top: "fc6_p" |
|
} |
|
layer { |
|
name: "drop6_p" |
|
type: "Dropout" |
|
bottom: "fc6_p" |
|
top: "fc6_p" |
|
dropout_param { |
|
dropout_ratio: 0.5 |
|
} |
|
} |
|
layer { |
|
name: "fc7_p" |
|
type: "InnerProduct" |
|
bottom: "fc6_p" |
|
top: "fc7_p" |
|
param { |
|
name: "fc7_w" |
|
lr_mult: 1 |
|
decay_mult: 1 |
|
} |
|
param { |
|
name: "fc7_b" |
|
lr_mult: 2 |
|
decay_mult: 0 |
|
} |
|
inner_product_param { |
|
num_output: 4096 |
|
weight_filler { |
|
type: "gaussian" |
|
std: 0.005 |
|
} |
|
bias_filler { |
|
type: "constant" |
|
value: 1 |
|
} |
|
} |
|
} |
|
layer { |
|
name: "relu7_p" |
|
type: "ReLU" |
|
bottom: "fc7_p" |
|
top: "fc7_p" |
|
} |
|
layer { |
|
name: "drop7_p" |
|
type: "Dropout" |
|
bottom: "fc7_p" |
|
top: "fc7_p" |
|
dropout_param { |
|
dropout_ratio: 0.5 |
|
} |
|
} |
|
layer { |
|
name: "fc8_p" |
|
type: "InnerProduct" |
|
bottom: "fc7_p" |
|
top: "fc8_p" |
|
param { |
|
name: "fc8_w" |
|
lr_mult: 1 |
|
decay_mult: 1 |
|
} |
|
param { |
|
name: "fc8_b" |
|
lr_mult: 2 |
|
decay_mult: 0 |
|
} |
|
inner_product_param { |
|
num_output: 1183 |
|
weight_filler { |
|
type: "gaussian" |
|
std: 0.01 |
|
} |
|
bias_filler { |
|
type: "constant" |
|
value: 0 |
|
} |
|
} |
|
} |
|
layer { |
|
name: "fc8_p_relu" |
|
type: "ReLU" |
|
bottom: "fc8_p" |
|
top: "fc8_p" |
|
} |
|
|
|
############################################################ |
|
# MERGE + LAST IP LAYERS |
|
############################################################ |
|
layer { |
|
name: "merge" |
|
type: "Concat" |
|
bottom: "fc8" |
|
bottom: "time" |
|
bottom: "histograms" |
|
top: "merge" |
|
} |
|
|
|
layer { |
|
name: "merge_p" |
|
type: "Concat" |
|
bottom: "fc8_p" |
|
bottom: "time_p" |
|
bottom: "histograms_p" |
|
top: "merge_p" |
|
} |
|
|
|
|
|
layer { |
|
name: "fc_final" |
|
type: "InnerProduct" |
|
bottom: "merge" |
|
top: "fc_final" |
|
param { |
|
name: "fc_final_w" |
|
lr_mult: 4 # Higher learning rate since this layer is starting from random |
|
decay_mult: 1 |
|
} |
|
param { |
|
name: "fc_final_b" |
|
lr_mult: 4 # Higher learning rate since this layer is starting from random |
|
decay_mult: 0 |
|
} |
|
inner_product_param { |
|
num_output: 200 |
|
weight_filler { |
|
type: "gaussian" |
|
std: 0.01 |
|
} |
|
bias_filler { |
|
type: "constant" |
|
value: 0.01 |
|
} |
|
} |
|
} |
|
|
|
layer { |
|
name: "fc_final_p" |
|
type: "InnerProduct" |
|
bottom: "merge_p" |
|
top: "fc_final_p" |
|
param { |
|
name: "fc_final_w" |
|
lr_mult: 4 # Higher learning rate since this layer is starting from random |
|
decay_mult: 1 |
|
} |
|
param { |
|
name: "fc_final_b" |
|
lr_mult: 4 # Higher learning rate since this layer is starting from random |
|
decay_mult: 0 |
|
} |
|
inner_product_param { |
|
num_output: 200 |
|
weight_filler { |
|
type: "gaussian" |
|
std: 0.01 |
|
} |
|
bias_filler { |
|
type: "constant" |
|
value: 0.01 |
|
} |
|
} |
|
} |
|
|
|
############################################################ |
|
# LOSS/ACCURACY LAYERS |
|
############################################################ |
|
layer { |
|
name: "loss" |
|
type: "ContrastiveLoss" |
|
bottom: "fc_final" |
|
bottom: "fc_final_p" |
|
bottom: "label" |
|
top: "loss" |
|
contrastive_loss_param { |
|
margin: 1.0 |
|
} |
|
} |
|
|
|
############################################################ |
|
# END |
|
############################################################ |
|
|
|
layer { |
|
name: "shot_id_silence" |
|
type: "Silence" |
|
bottom: "shot_id" |
|
} |
|
|
|
layer { |
|
name: "shot_id_silence_p" |
|
type: "Silence" |
|
bottom: "shot_id_p" |
|
} |
|
|
|
layer { |
|
name: "label_image_silence" |
|
type: "Silence" |
|
bottom: "label_image" |
|
} |
|
|
|
layer { |
|
name: "label_image_silence_p" |
|
type: "Silence" |
|
bottom: "label_image_p" |
|
} |
I am sorry to disturb you.
I have downloaded your model of scene detection in 23rd ACM International Conference on Multimedia, 2015.
However, I can't find the bbc_train_image.txt, bbc_train_image_p.txt, bbc_train_h5.txt, bbc_test_image.txt, bbc_test_image_p.txt, bbc_test_h5.txt.
Where can I find them or download them?