Last active
August 28, 2016 22:39
-
-
Save dineshj1/7e38e6a68d6f7d81cc771ed77ce3d656 to your computer and use it in GitHub Desktop.
Hyperparameter optimization scripts
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function submitjobs(nosubmit, numjobs, main) | |
try | |
xlwrite_path='../data_utils/xlwrite/'; | |
addpath(xlwrite_path); | |
javaaddpath([xlwrite_path 'poi_library/poi-3.8-20120326.jar']); | |
javaaddpath([xlwrite_path 'poi_library/poi-ooxml-3.8-20120326.jar']); | |
javaaddpath([xlwrite_path 'poi_library/poi-ooxml-schemas-3.8-20120326.jar']); | |
javaaddpath([xlwrite_path 'poi_library/xmlbeans-2.3.0.jar']); | |
javaaddpath([xlwrite_path 'poi_library/dom4j-1.6.1.jar']); | |
javaaddpath([xlwrite_path 'poi_library/stax-api-1.0.1.jar']); | |
if nargin<1, nosubmit =true; end | |
if nargin<2, numjobs=1; end | |
if nargin<3, main='./lookahead_attention.lua'; end | |
% --- establishing system setup | |
system('hostname > tmp.txt'); | |
if ~isempty(regexp(fileread('tmp.txt'), 'vision', 'once')) || ~isempty(regexp(fileread('tmp.txt'), 'adriana', 'once')) || ~isempty(regexp(fileread('tmp.txt'), 'eldar', 'once')) | |
hpcsystem='condor'; | |
else | |
hpcsystem='slurm'; | |
end | |
if ~isempty(regexp(fileread('tmp.txt'), 'maverick', 'once')) | |
clustername='maverick'; | |
elseif ~isempty(regexp(fileread('tmp.txt'), 'stampede', 'once')) | |
clustername='stampede'; | |
else | |
clustername='condor'; | |
end | |
if strcmp(hpcsystem, 'condor') | |
% get clusterno through condor_q or condor_history commands | |
clusterno=-1; | |
system('condor_q -format "%d\n" ClusterId > tmp.txt 2> /dev/null'); | |
allclust=load('tmp.txt'); | |
if ~isempty(allclust) | |
clusterno=max(max(allclust), clusterno); | |
end | |
system('condor_history -format "%d\n" ClusterId 2> /dev/null | head -50 > tmp.txt'); | |
allclust=load('tmp.txt'); | |
if ~isempty(allclust) | |
clusterno=max(max(allclust), clusterno); | |
end | |
else % do the next best thing - assign another unique ID | |
origclusterno=load('.lastclusterno'); | |
clusterno=origclusterno; | |
end | |
% --- hpcjob template construction | |
hpcjob_template.main=main; | |
hpcjob_template.standard_args=' --cuda --no_debug --no_loadInit --full_data'; | |
%hpcjob_template.standard_args=' --cuda --no_debug --no_loadInit'; | |
hpcjob_template.useGPU=true; | |
% --- declaring hyperparameter ranges | |
hypno=0; | |
autoreport_flag=false; | |
autoreport_oldjobs=0; | |
%hypno=hypno+1; hyp(hypno)=HyperParam('learningRate', 'log_uniform', [-2.3,-1.7]); | |
%hypno=hypno+1; hyp(hypno)=HyperParam('learningRate', 'log_uniform', [-2 -1]); | |
%hypno=hypno+1; hyp(hypno)=HyperParam('learningRate', 'log_uniform', [-3,0]); | |
hypno=hypno+1; hyp(hypno)=HyperParam('initModel', 'fix', '/home/01932/dineshj/save/2002450.dat'); | |
%hypno=hypno+1; hyp(hypno)=HyperParam('initModel', 'fix', '/home/01932/dineshj/save/2002365.dat'); | |
%hypno=hypno+1; hyp(hypno)=HyperParam('initModel', 'fix', '/home/01932/dineshj/save/2002329.dat'); | |
%hypno=hypno+1; hyp(hypno)=HyperParam('learningRate', 'log_uniform', [-4,-2.5]); | |
%hypno=hypno+1; hyp(hypno)=HyperParam('learningRate', 'sequence', logspace(-2,-1,numjobs)); | |
hypno=hypno+1; hyp(hypno)=HyperParam('learningRate', 'fix', 0.00316); | |
hypno=hypno+1; hyp(hypno)=HyperParam('finetune_lrMult', 'fix', -1); | |
hypno=hypno+1; hyp(hypno)=HyperParam('finetuneTopFlag', 'fix', +1); | |
hypno=hypno+1; hyp(hypno)=HyperParam('actOnTime', 'fix', +1); | |
hypno=hypno+1; hyp(hypno)=HyperParam('actOnElev', 'fix', +1); | |
%hypno=hypno+1; hyp(hypno)=HyperParam('learningRate', 'fix', 0.04); | |
%hypno=hypno+1; hyp(hypno)=HyperParam('learningRate', 'fix', 0.0224); | |
%hypno=hypno+1; hyp(hypno)=HyperParam('learningRate', 'fix', 0.01); | |
%hypno=hypno+1; hyp(hypno)=HyperParam('lookahead_loss_wt', 'log_uniform', [1.5,2.5]); | |
hypno=hypno+1; hyp(hypno)=HyperParam('weightDecay', 'fix', 0.005); | |
hypno=hypno+1; hyp(hypno)=HyperParam('featDropout', 'fix', 2e-5); | |
%hypno=hypno+1; hyp(hypno)=HyperParam('featDropout', 'fix', 0); | |
hypno=hypno+1; hyp(hypno)=HyperParam('combineDropout', 'fix', 0.5); | |
hypno=hypno+1; hyp(hypno)=HyperParam('batchNormFlag', 'fix', +1); | |
%hypno=hypno+1; hyp(hypno)=HyperParam('combineDropout', 'fix', 0); | |
hypno=hypno+1; hyp(hypno)=HyperParam('lookahead_loss_wt', 'fix', 100); | |
%hypno=hypno+1; hyp(hypno)=HyperParam('equiv_reg_wt', 'sequence', logspace(-4,-0,numjobs)); | |
hypno=hypno+1; hyp(hypno)=HyperParam('equiv_reg_wt', 'fix', 42); | |
%hypno=hypno+1; hyp(hypno)=HyperParam('equiv_reg_wt', 'fix', 0); | |
%hypno=hypno+1; hyp(hypno)=HyperParam('num_canonical_acts', 'pmf', [1:9; 1./(1:9).^2]); | |
hypno=hypno+1; hyp(hypno)=HyperParam('lookAheadActorFlag', 'fix', -1); | |
tmp=getHyperParam(hyp, 'lookAheadActorFlag'); | |
if ~(strcmp(tmp.dist, 'fix') && tmp.params<=0) | |
hypno=hypno+1; hyp(hypno)=HyperParam('num_canonical_acts', 'fix', 0); % relevant only if lookAheadActor | |
hypno=hypno+1; hyp(hypno)=HyperParam('lookAheadClassifyFlag', 'fix', -1); % relevant only if lookAheadActor | |
else | |
hypno=hypno+1; hyp(hypno)=HyperParam('num_canonical_acts', 'fix', 0); | |
hypno=hypno+1; hyp(hypno)=HyperParam('lookAheadClassifyFlag', 'fix', -1); % relevant only if lookAheadActor | |
end | |
hypno=hypno+1; hyp(hypno)=HyperParam('rho', 'fix', 3); | |
hypno=hypno+1; hyp(hypno)=HyperParam('greedyLossFlag', 'fix', +1); | |
hypno=hypno+1; hyp(hypno)=HyperParam('shareClassifiersFlag', 'fix', -1); | |
hypno=hypno+1; hyp(hypno)=HyperParam('identFeedbackFlag', 'fix', -1); | |
hypno=hypno+1; hyp(hypno)=HyperParam('initFeedbackFlag', 'fix', +1); | |
hypno=hypno+1; hyp(hypno)=HyperParam('relativeActionFlag', 'fix', +1); | |
hypno=hypno+1; hyp(hypno)=HyperParam('simplePatchSensorFlag', 'fix', +1); | |
hypno=hypno+1; hyp(hypno)=HyperParam('maxTries', 'fix', 50); | |
hypno=hypno+1; hyp(hypno)=HyperParam('pretrainModeEpochs', 'fix', 0); | |
%hypno=hypno+1; hyp(hypno)=HyperParam('pretrainModeEpochs', 'sequence', [0, 10, 0, 10]); | |
hypno=hypno+1; hyp(hypno)=HyperParam('lookahead_bottleneck', 'fix', 100); | |
hypno=hypno+1; hyp(hypno)=HyperParam('lookahead_distance', 'fix', 'cosine'); | |
%hypno=hypno+1; hyp(hypno)=HyperParam('maxEpoch', 'fix', 2000); | |
%hypno=hypno+1; hyp(hypno)=HyperParam('manual_seed', 'sequence', [79836, 94301, 68372, 13209, 72273, 11036, 11750, 64072, 32882, 65382]); | |
%hypno=hypno+1; hyp(hypno)=HyperParam('manual_seed', 'sequence', [ceil(rand(numjobs,1)*1e5)]); | |
hypno=hypno+1; hyp(hypno)=HyperParam('manual_seed', 'sequence', [1000 2000 6000 4000 5000]); | |
%hypno=hypno+1; hyp(hypno)=HyperParam('manual_seed', 'fix', 38828); | |
%hypno=hypno+1; hyp(hypno)=HyperParam('manual_seed', 'fix', 6000); | |
hypno=hypno+1; hyp(hypno)=HyperParam('report_res_iter', 'fix', 20); | |
hypno=hypno+1; hyp(hypno)=HyperParam('randomActionsFlag', 'fix', 0); % to implement random actions baseline | |
%hypno=hypno+1; hyp(hypno)=HyperParam('randomActionsFlag', 'sequence', 0); % to implement random actions baseline | |
time=200 | |
xlscells={}; | |
xlscells{1,1}='jobno'; | |
xlscells{1,2}='commit'; | |
[~,curr_commit]=system('git log --format="%h" -n 1'); | |
curr_commit=curr_commit(9:15); | |
for j=1:numel(hyp) | |
xlscells{1,j+2}=hyp(j).name; | |
end | |
for i=1:numjobs | |
fprintf(['\n' repmat('-',1,20) '\n']); | |
clusterno = clusterno+1; | |
if i==1 | |
start_clusterno=clusterno; | |
stop_clusterno=clusterno+numjobs-1; | |
end | |
tmp=fieldnames(hpcjob_template); | |
for fieldno=1:numel(tmp) | |
hpcjob(i).(tmp{fieldno})=hpcjob_template.(tmp{fieldno}); | |
end | |
hpcjob(i).clusterno=clusterno; | |
xlscells{i+1,1}=clusterno; | |
xlscells{i+1,2}=curr_commit; | |
% ----- sampling random hyperparameters | |
for j=1:numel(hyp) | |
switch hyp(j).dist | |
case 'uniform' | |
% params: [lb, ub] | |
val=rand(1)*(hyp(j).params(2)-hyp(j).params(1))+hyp(j).params(1); | |
case 'log_uniform' | |
% params: [lb, ub] in log space | |
val=10^(rand(1)*(hyp(j).params(2)-hyp(j).params(1))+hyp(j).params(1)); | |
case 'disc_uniform' | |
% params: [values] | |
val=randsample(hyp(j).params, 1); | |
case 'pmf' | |
% params: [values; weights] | |
pmf=hyp(j).params(2,:); | |
pmf=pmf/sum(pmf); | |
val=hyp(j).params(1,mnrnd(1,pmf)); | |
%cmf=cumsum(pmf); | |
%val=hyp(j).params(find(rand(1)<cmf, 1)); | |
case 'gaussian' | |
% params: [mean, std] | |
val=randn(1)*hyp(j).params(2)+hyp(j).params(1); | |
case 'sequence' | |
% deterministic progression | |
val=hyp(j).params(i); | |
case 'fix' | |
% fixed to single value | |
val=hyp(j).params; | |
otherwise | |
error('unknown distribution %s', hyp(j).dist); | |
end | |
if isfield(hpcjob(i), 'args') | |
hpcjob(i).args(end+1)=struct('name', hyp(j).name, 'val', val); | |
else | |
hpcjob(i).args(1)=struct('name', hyp(j).name, 'val', val); | |
end | |
xlscells{i+1,j+2}=val; | |
end | |
if strcmp(hpcsystem,'slurm') | |
hpcjob(i).args(end+1)=struct('name', 'sys_cmd', 'val', sprintf('"rsync -avrz -e ssh ../clust_runs/*%d* [email protected]:/scratch/vision/dineshj/active/clust_runs/; rsync -avrz -e ssh ../clust_runs/*%d* [email protected]:/home/dineshj/Documents/clust_runs/;"', clusterno, clusterno)); | |
hpcjob(i).args(end+1)=struct('name', 'sys_cmd_iter', 'val', 10); | |
if autoreport_flag && i==numjobs | |
hpcjob(i).args(end+1)= struct('name', 'sys_cmd2', 'val', sprintf('th collate_results.lua --start %d --stop %d;"', start_clusterno-autoreport_oldjobs, stop_clusterno)); | |
end | |
end | |
%-- setting jobno and logger file | |
hpcjob(i).args(end+1)=struct('name', 'loggerfile', 'val', sprintf('../clust_runs/%d.rec', clusterno)); | |
hpcjob(i).args(end+1)=struct('name', 'jobno', 'val', clusterno); | |
fprintf('Sampled args(%d):\n', clusterno); | |
fprintf('\t---\n') | |
arg_disp(hpcjob(i)); | |
fprintf('\t---\n') | |
delete('hyperparams.xls'); | |
xlwrite('hyperparams.xls', xlscells); | |
%-- creating submit file and submitting | |
switch hpcsystem | |
case 'condor' | |
condor_submitFile=sprintf('../clust_runs/%d.submit.%s', clusterno, hpcsystem); | |
fprintf('Submit file %s\n', condor_submitFile); | |
struct2condorsubmit(hpcjob(i), condor_submitFile); | |
if nosubmit, keyboard; end | |
status=system(sprintf('condor_submit %s 2> /dev/null', condor_submitFile)); | |
assert(status==0); | |
case 'slurm' | |
slurm_submitFile=sprintf('../clust_runs/%d.submit.%s', clusterno, hpcsystem); | |
fprintf('Submit file %s\n', slurm_submitFile); | |
if ~exist('time', 'var') | |
time=20; | |
end | |
struct2slurmsubmit(hpcjob(i), slurm_submitFile, clusterno, clustername, time); | |
if nosubmit, keyboard; end | |
status=system(sprintf('sbatch %s', slurm_submitFile)); | |
assert(status==0); | |
% updating lastclusterno file | |
origclusterno=origclusterno+1; | |
f=fopen('.lastclusterno', 'w'); | |
fprintf(f, '%d', origclusterno); | |
fclose(f); | |
otherwise | |
error('Unknown hpc system'); | |
end | |
end | |
catch err | |
getReport(err) | |
keyboard | |
end | |
end | |
function struct2slurmsubmit(jobstruct, filename, clusterno, cluster, num_mins) | |
FILE=fopen(filename,'w'); | |
fprintf(FILE, '#!/bin/bash\n'); | |
fprintf(FILE, '#SBATCH -J stark\n'); | |
fprintf(FILE, '#SBATCH -o ../clust_runs/%d.out\n', clusterno); | |
fprintf(FILE, '#SBATCH -n 1\n'); | |
if jobstruct.useGPU | |
switch cluster | |
case 'stampede' | |
fprintf(FILE, '#SBATCH -A Visual-Recognition\n'); | |
%fprintf(FILE, '#SBATCH -A Fine-Tuning-CNNs\n'); | |
fprintf(FILE, '#SBATCH -p gpu\n'); | |
%fprintf(FILE, '#SBATCH -p vis\n'); | |
case 'maverick' | |
fprintf(FILE, '#SBATCH -A Visual-Recognition\n'); | |
%fprintf(FILE, '#SBATCH -A Fine-Tuning-CNNs\n'); | |
fprintf(FILE, '#SBATCH -p gpu\n'); | |
end | |
else | |
fprintf(FILE, '#SBATCH -p normal\n'); | |
end | |
fprintf(FILE, '#SBATCH -t 00:%02d:00\n', num_mins); | |
switch cluster | |
case 'stampede' | |
fprintf(FILE, 'th %s %s', jobstruct.main, jobstruct.standard_args); | |
case 'maverick' | |
fprintf(FILE, 'th %s %s', jobstruct.main, jobstruct.standard_args); | |
otherwise | |
error('Unknown cluster name'); | |
end | |
arg_string=''; | |
for argno=1:numel(jobstruct.args) | |
arg_string=[arg_string, ' --', jobstruct.args(argno).name, ' ', arg2str(jobstruct.args(argno).val)]; | |
end | |
fprintf(FILE, arg_string); | |
fclose(FILE); | |
end | |
function struct2condorsubmit(jobstruct, filename) | |
FILE=fopen(filename,'w'); | |
fprintf(FILE, '+Group="GRAD"\n'); | |
fprintf(FILE, '+Project="AI_ROBOTICS"\n'); | |
fprintf(FILE, '+ProjectDescription=""\n'); | |
if jobstruct.useGPU | |
fprintf(FILE, '+GPUJob=true\n'); | |
fprintf(FILE, 'Requirements=TARGET.GPUSlot\n'); | |
end | |
fprintf(FILE, 'Universe = vanilla\n'); | |
fprintf(FILE, 'Getenv = True\n'); | |
fprintf(FILE, 'Log = ../clust_runs/%d.log\n', jobstruct.clusterno); | |
fprintf(FILE, 'Output = ../clust_runs/%d.out\n', jobstruct.clusterno); | |
fprintf(FILE, 'Error = ../clust_runs/%d.err\n', jobstruct.clusterno); | |
fprintf(FILE, 'Executable=/vision/vision_users/dineshj/torch_cuda/bin/th \n'); | |
fprintf(FILE, 'Arguments= %s %s', jobstruct.main, jobstruct.standard_args); | |
arg_string=''; | |
for argno=1:numel(jobstruct.args) | |
arg_string=[arg_string, ' --', jobstruct.args(argno).name, ' ', arg2str(jobstruct.args(argno).val)]; | |
end | |
fprintf(FILE, arg_string); | |
fprintf(FILE, '\nQueue %d', 1); | |
fclose(FILE); | |
end | |
function str=arg2str(arg) | |
if ischar(arg) | |
str=arg; | |
return | |
end | |
if isnumeric(arg) | |
str=num2str(arg); | |
return; | |
end | |
end | |
function HyperParamObject = HyperParam(name, dist, params) | |
HyperParamObject = struct('name', name, 'dist', dist, 'params', params); | |
end | |
function param = getHyperParam(hyp_array, param_name) | |
hyp_names=arrayfun(@(x) x.name, hyp_array, 'UniformOutput', false); | |
tmp=find(strcmp(hyp_names, param_name)); | |
assert(~isempty(tmp), sprintf('No parameter %s\n', param_name)); | |
assert(numel(tmp)<2, sprintf('More than one parameter %s\n', param_name)); | |
param=hyp_array(tmp); return; | |
end | |
function arg_disp(jobstruct) | |
if ~isfield(jobstruct, 'args') | |
return | |
end | |
for i=1:numel(jobstruct.args) | |
fprintf('%s\t= %s\n', jobstruct.args(i).name, arg2str(jobstruct.args(i).val)); | |
end | |
end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import csv | |
sys.path.insert(0, "../kitti_codes/") | |
import socket | |
import time | |
import re | |
import argparse | |
import numpy as np | |
from subprocess import call | |
from IPython.core.debugger import Tracer; debug_here = Tracer() | |
machine_name=socket.gethostname(); | |
if (re.search('vision',machine_name) or | |
re.search('eldar', machine_name) or | |
re.search('adriana', machine_name) or | |
re.search('jaechul', machine_name)): | |
hpcsystem='condor'; | |
#error('Not implemented'); | |
sys.path.insert(0, "/scratch/vision/dineshj/caffe2/distribute_CPU/python") | |
else: | |
hpcsystem='slurm'; | |
sys.path.insert(0, "/work/01932/dineshj/caffe2/python") | |
import caffe | |
from caffe import layers as L | |
from caffe import params as P | |
import layer_stack as LS | |
if re.search('maverick', machine_name): | |
clustername='maverick'; | |
elif re.search('stampede', machine_name): | |
clustername='stampede'; | |
else: | |
clustername='condor'; | |
#hpcsystem='slurm' | |
#clustername='maverick' | |
#read clusterno | |
lastclusterno=int(open('.lastclusterno').read()); | |
print("Last clusterno: %d" % lastclusterno); | |
clusterno=lastclusterno; | |
resume_flag=False | |
snapshot='' | |
finetune_flag=False | |
weights='' | |
def main(): | |
global clusterno | |
#config='drlim'; | |
base_solver=LS.CaffeSolver(debug=args.debug) | |
if args.config == 'cls': | |
num_cls=397; | |
cls_batchsize=64; | |
cls_loss_weights=10*np.ones(args.numjobs); | |
base_solver.sp["max_iter"]=str(30000); | |
base_solver.sp["snapshot"]=str(10000); | |
base_solver.sp["stepsize"]=str(5000); base_solver.sp["gamma"]=str(0.5); | |
base_solver.sp["weight_decay"]=str(0.0005); | |
runtime=720; | |
learning_rates=np.logspace(-4,-4,args.numjobs); | |
elif args.config == 'drlim' or args.config == "ved_drlim": | |
num_cls=397; | |
nonDiscrete_flag=False | |
dynamicCrop_flag=False | |
pair_batchsize=64; cls_batchsize=64; | |
#drlim_loss_weights=np.ones(args.numjobs); | |
drlim_loss_weights=np.zeros(args.numjobs); | |
#trans_loss_weights=1*np.ones(args.numjobs)*(1 if args.config=='ved_drlim' else 0); | |
#trans_loss_weights=np.logspace(-2,0,args.numjobs)*(1 if args.config=='ved_drlim' else 0); | |
#trans_loss_weights=np.linspace(0.1,0.9,args.numjobs)*(1 if args.config=='ved_drlim' else 0); | |
trans_loss_weights=np.ones(args.numjobs)*(1 if args.config=='ved_drlim' else 0); | |
drlim_loss_margins=100*np.ones(args.numjobs); | |
trans_loss_margins=100*np.ones(args.numjobs); | |
cls_loss_weights=10*np.ones(args.numjobs); | |
base_solver.sp["max_iter"]=str(70000); | |
base_solver.sp["snapshot"]=str(10000); | |
base_solver.sp["stepsize"]=str(5000); base_solver.sp["gamma"]=str(0.5); | |
base_solver.sp["weight_decay"]=str(0.005); | |
runtime=720; | |
learning_rates=np.logspace(-4.5,-2.5,args.numjobs); | |
csvfile = open('hyperparams.csv', 'w'); | |
fieldnames=[ | |
"jobno" , | |
"config" , | |
"base_lr" , | |
"nonDiscrete_flag" , | |
"dynamicCrop_flag" , | |
"pair_batchsize" , | |
"cls_batchsize" , | |
"cls_loss_weight" , | |
"drlim_loss_weight" , | |
"trans_loss_weight" , | |
"drlim_loss_margin" , | |
"trans_loss_margin" , | |
"weight_decay" , | |
"max_iter" , | |
"snapshot" , | |
"stepsize" , | |
"gamma" , | |
"type" , | |
"momentum" , | |
"momentum2" , | |
"random_seed" | |
]; | |
csvwriter= csv.DictWriter(csvfile, fieldnames=fieldnames); | |
csvwriter.writeheader() | |
for jobno in range(args.numjobs): | |
if jobno>0: | |
print("Delay..."); | |
time.sleep(5); | |
clusterno=clusterno+1; | |
print ("Creating files for job clusterno %d" % clusterno); | |
solver_file="../condor/" + str(clusterno)+'_solver.prototxt' | |
trainnet_file="../condor/" + str(clusterno) + '_trainnet.prototxt' | |
testnet_file="../condor/" + str(clusterno) + '_testnet.prototxt' | |
deploynet_file="../condor/" + str(clusterno) + '_deploynet.prototxt' | |
if args.config=='drlim' or args.config=='ved_drlim': | |
#pretrain=True | |
solver=base_solver; | |
solver.sp["random_seed"]=str(clusterno); | |
solver.sp["train_net"]= '"'+trainnet_file+'"'; | |
solver.sp["test_net"]= '"'+testnet_file+'"'; | |
solver.sp["snapshot_prefix"]='"'+"../caffe_snapshots/"+ str(clusterno) + "_snap"+'"'; | |
solver.sp["base_lr"]= str(learning_rates[jobno]); | |
def net_phase(phase): # phase == 'train', 'test' or 'deploy' | |
assert(phase=='train' or | |
phase=='test' or | |
phase=='deploy' | |
) | |
net=caffe.NetSpec() | |
## Data layers ######################## | |
if phase=='train': | |
if not dynamicCrop_flag: | |
net["pair_data"]=L.Data( | |
source="../kitti_codes/KITTI_files/trn_trans_pairs_full-clust6_3-nbd10/" , | |
transform_param=dict( | |
mean_value=[104,117,123,104,117,123]), | |
batch_size=pair_batchsize, | |
ntop=1, | |
backend=P.Data.LMDB, | |
#include=dict(phase=caffe.TRAIN), | |
name="pair_kitti_data_trn"); | |
net["trans_labelvec"]=L.HDF5Data( | |
source="../kitti_codes/trn_full_trans_pairs-clust6_3-nbd10_labels.txt", | |
batch_size=pair_batchsize, | |
ntop=1, | |
#include=dict(phase=caffe.TRAIN), | |
name="pair_kitti_label_trn" | |
) | |
else: | |
net["pair_data"]=L.Data( | |
source="../kitti_codes/KITTI/trn_trans_pairs_370x1226_clust6_3-nbd7/" , | |
transform_param=dict( | |
mirror=False, | |
crop_size=227, | |
mean_value=[104,117,123,104,117,123]), | |
batch_size=pair_batchsize, | |
ntop=1, | |
backend=P.Data.LMDB, | |
#include=dict(phase=caffe.TRAIN), | |
name="pair_kitti_data_trn"); | |
net["trans_labelvec"]=L.HDF5Data( | |
source="../kitti_codes/trn_370x1226_trans_pairs-clust6_3-nbd7_labels.txt", | |
batch_size=pair_batchsize, | |
ntop=1, | |
#include=dict(phase=caffe.TRAIN), | |
name="pair_kitti_label_trn" | |
) | |
elif phase=='test': | |
if not dynamicCrop_flag: | |
net["pair_data"]=L.Data( | |
source="../kitti_codes/KITTI_files/trn_trans_pairs_full-clust6_3-nbd10/" , | |
transform_param=dict( | |
mean_value=[104,117,123,104,117,123]), | |
batch_size=pair_batchsize, | |
ntop=1, | |
backend=P.Data.LMDB, | |
#include=dict(phase=caffe.TRAIN), | |
name="pair_kitti_data_tst"); | |
net["trans_labelvec"]=L.HDF5Data( | |
source="../kitti_codes/trn_full_trans_pairs-clust6_3-nbd10_labels.txt", | |
batch_size=pair_batchsize, | |
ntop=1, | |
#include=dict(phase=caffe.TRAIN), | |
name="pair_kitti_label_tst" | |
) | |
else: | |
net["pair_data"]=L.Data( | |
source="../kitti_codes/KITTI/tst_trans_pairs_370x1226_clust6_3-nbd7/" , | |
transform_param=dict( | |
mirror=False, | |
crop_size=227, | |
mean_value=[104,117,123,104,117,123]), | |
batch_size=pair_batchsize, | |
ntop=1, | |
backend=P.Data.LMDB, | |
#include=dict(phase=caffe.TRAIN), | |
name="pair_kitti_data_tst"); | |
net["trans_labelvec"]=L.HDF5Data( | |
source="../kitti_codes/tst_370x1226_trans_pairs-clust6_3-nbd7_labels.txt", | |
batch_size=pair_batchsize, | |
ntop=1, | |
#include=dict(phase=caffe.TRAIN), | |
name="pair_kitti_label_tst" | |
) | |
#net["pair_data"], net["pair_labelvec"] = L.DummyData(name="dummy_pair_data", | |
# ntop=2, | |
# shape=[dict(dim=[pair_batchsize, 6, 227, 227]), dict(dim=[pair_batchsize, 1, 1, 7])] | |
# ); | |
if phase=='train' or phase == 'test': # no deploy | |
net["a_data"], net["b_data"] = L.Slice( | |
net["pair_data"], name="slice_data", | |
slice_param=dict( | |
slice_dim=1, | |
slice_point=[3]), | |
ntop=2 | |
); | |
net["sim_label"], net["trans_label1"], net["trans_label2"], net["trans_label3"], net["trans_mot_labels"] = L.Slice( | |
net["trans_labelvec"], | |
name="slice_pair_label", | |
slice_param=dict( | |
slice_point=[1, 2, 3, 4] | |
), | |
ntop=5, | |
) | |
net["silent"]=L.Silence( | |
net["sim_label"], | |
net["trans_label1"], | |
net["trans_label2"], | |
net["trans_label3"], | |
net["trans_mot_labels"], | |
ntop=0 | |
) | |
## Siamese network ######################## | |
net=LS.generate_conv1_to_bn6( | |
net, | |
blob_prefix="a_", | |
layer_prefix="a_", | |
param_prefix="shared_", | |
bottom_blob="a_data", | |
top_blob="a_top", | |
num_dropouts=0, | |
learn_all=True, | |
in_place_pool5=False | |
); | |
net=LS.generate_conv1_to_bn6( | |
net, | |
blob_prefix="b_", | |
layer_prefix="b_", | |
param_prefix="shared_", | |
bottom_blob="b_data", | |
top_blob="b_top", | |
num_dropouts=0, | |
learn_all=True, | |
in_place_pool5=False | |
); | |
## drlim loss ######################## | |
net=LS.generate_contrastive_loss( | |
net, | |
blob_prefix="drlim_", | |
layer_prefix="drlim_", | |
param_prefix="", | |
bottom_blob=["a_top", "b_top", "sim_label"], | |
loss_weight=drlim_loss_weights[jobno], | |
loss_margin=drlim_loss_margins[jobno], | |
learn_all=True | |
); | |
## equivariance loss ######################## | |
if args.config=='ved_drlim': | |
if not nonDiscrete_flag: | |
num_transforms=3; | |
for i in range(num_transforms): | |
net=LS.generate_equivariant_map( | |
net, | |
blob_prefix="trans"+str(i+1)+"_", | |
layer_prefix="trans"+str(i+1)+"_", | |
bottom_blob="b_top", | |
top_blob="c_top" + str(i+1), | |
bottleneck_size=128, | |
nonDiscrete_flag=nonDiscrete_flag, | |
learn_all=True | |
) | |
net=LS.generate_contrastive_loss( | |
net, | |
blob_prefix="trans_", | |
blob_suffix=str(i+1), | |
layer_prefix="trans_", | |
layer_suffix=str(i+1), | |
bottom_blob=["a_top", "c_top" + str(i+1), "trans_label" + str(i+1)], | |
loss_weight=trans_loss_weights[jobno], | |
loss_margin=trans_loss_margins[jobno], | |
learn_all=True | |
) | |
else: | |
net=LS.generate_equivariant_map( | |
net, | |
blob_prefix="trans_", | |
layer_prefix="trans_", | |
bottom_blob="b_top", | |
top_blob="c_top", | |
bottleneck_size=128, | |
nonDiscrete_flag=nonDiscrete_flag, | |
motion_blob="trans_mot_labels", | |
learn_all=True | |
) | |
net["trans_loss"]=L.EuclideanLoss( | |
net["a_top"], | |
net["c_top"], | |
loss_weight=trans_loss_weights[jobno] | |
) | |
## classification pipeline (for either monitoring, or training) | |
if phase == 'train': | |
net["cls_data"], net["cls_label"]=L.Data( | |
source="../kitti_codes/SUN/pulkit_lmdbs/sun_imSz256_ntpc5_run1_train-lmdb" , | |
transform_param=dict( | |
mirror=True, | |
crop_size=227, | |
mean_value=[104,117,123]), | |
batch_size=cls_batchsize, | |
ntop=2, | |
backend=P.Data.LMDB, | |
#include=dict(phase=caffe.TRAIN), | |
name="cls_sun_trn"); | |
elif phase=='test': | |
net["cls_data"], net["cls_label"]=L.Data( | |
source="../kitti_codes/SUN/pulkit_lmdbs/sun_imSz256_ntpc50_run1_test-lmdb/" , | |
transform_param=dict( | |
mirror=False, | |
crop_size=227, | |
mean_value=[104,117,123]), | |
batch_size=cls_batchsize, | |
ntop=2, | |
backend=P.Data.LMDB, | |
#include=dict(phase=caffe.TEST), | |
name="cls_sun_tst"); | |
elif phase=='deploy': | |
net["cls_data"]=L.Input( | |
input_param=dict( | |
shape=dict(dim=[1,3,227,227]) | |
), | |
name="cls_sun_deploy" | |
) | |
#net["cls_data"], net["cls_label"] = L.DummyData(name="dummy_cls_data", | |
# ntop=2, | |
# shape=[dict(dim=[cls_batchsize,3,227,227]), dict(dim=[cls_batchsize, 1])] | |
# ) | |
if phase=='train' or phase== 'test' or phase== 'deploy': # trivially satisfied since there is a previous assert, but just in case something changes later | |
net=LS.generate_conv1_to_bn6( | |
net, | |
blob_prefix="cls_", | |
layer_prefix="", | |
param_prefix="shared_", | |
bottom_blob="cls_data", | |
top_blob="cls_bn6", | |
num_dropouts=1 if args.num_dropouts>=2 else 0, | |
#learn_all=False if args.pretrain else True | |
learn_all=True | |
) | |
if phase=='train' or phase== 'test': | |
#L6 | |
net=LS.generate_classifier( | |
net, | |
blob_prefix="L6_", | |
layer_prefix="L6_", | |
param_prefix="", | |
bottom_blob=["cls_bn6", "cls_label"], | |
learn_all=True, | |
propagate_down=True if (not args.pretrain and cls_loss_level=='L6') else False, | |
num_cls=num_cls, | |
loss_weight=cls_loss_weights[jobno], | |
loss_name="cls_L6_loss", | |
acc_name="cls_L6_acc", | |
num_dropouts=1 if args.num_dropouts>=1 else 0, | |
) | |
#L3 | |
net=LS.generate_classifier( | |
net, | |
blob_prefix="L3_", | |
layer_prefix="L3_", | |
param_prefix="", | |
bottom_blob=["cls_bn3", "cls_label"], | |
learn_all=True, | |
propagate_down=True if (not args.pretrain and cls_loss_level=='L3') else False, | |
num_cls=num_cls, | |
loss_weight=cls_loss_weights[jobno], | |
loss_name="cls_L3_loss", | |
acc_name="cls_L3_acc", | |
) | |
return net | |
else: | |
raise Exception('config %s not handled yet' % config) | |
print ("Solver: %s" % solver_file); | |
solver.write(solver_file); | |
train_net=net_phase('train'); | |
test_net=net_phase('test'); | |
deploy_net=net_phase('deploy'); | |
print ("Train net: %s" % trainnet_file); | |
with open(trainnet_file, 'w') as f: | |
f.write(str(train_net.to_proto())); | |
print ("Test net: %s" % testnet_file); | |
with open(testnet_file, 'w') as f: | |
f.write(str(test_net.to_proto())); | |
print ("Deploy net: %s" % deploynet_file); | |
with open(deploynet_file, 'w') as f: | |
f.write(str(deploy_net.to_proto())); | |
if args.debug: | |
try: | |
net=caffe.Net(trainnet_file, caffe.TRAIN); | |
net=caffe.Net(testnet_file, caffe.TEST); | |
net=caffe.Net(deploynet_file, caffe.TEST); | |
except: | |
raise Exception("network file raises error"); | |
# store job details to csv | |
job_signature = {}; | |
job_signature["jobno"] = clusterno | |
job_signature["config"] = args.config | |
job_signature["base_lr"] = solver.sp["base_lr"] | |
job_signature["nonDiscrete_flag"] = nonDiscrete_flag | |
job_signature["dynamicCrop_flag"] = dynamicCrop_flag | |
job_signature["pair_batchsize"] = pair_batchsize | |
job_signature["cls_batchsize"] = cls_batchsize | |
job_signature["cls_loss_weight"] = cls_loss_weights[jobno] | |
job_signature["drlim_loss_weight"] = drlim_loss_weights[jobno] | |
job_signature["trans_loss_weight"] = trans_loss_weights[jobno] | |
job_signature["drlim_loss_margin"] = drlim_loss_margins[jobno] | |
job_signature["trans_loss_margin"] = trans_loss_margins[jobno] | |
job_signature["weight_decay"] = solver.sp["weight_decay"] | |
job_signature["max_iter"] = solver.sp["max_iter"] | |
job_signature["snapshot"] = solver.sp["snapshot"] | |
job_signature["stepsize"] = solver.sp["stepsize"] | |
job_signature["gamma"] = solver.sp["gamma"] | |
job_signature["momentum"] = solver.sp["momentum"] | |
job_signature["momentum2"] = solver.sp["momentum2"] | |
job_signature["type"] = solver.sp["type"] | |
job_signature["random_seed"] = solver.sp["random_seed"] | |
csvwriter.writerow(job_signature); | |
print("Job signature stored to %s" % 'hyperparams.csv') | |
# submit job | |
if hpcsystem=="condor": | |
condor_submitFile='../condor/%d.condor_submit' % clusterno | |
print("Condor submit file: %s" % condor_submitFile); | |
with open(condor_submitFile, 'w') as file: | |
file.write('+Group="GRAD"\n'); | |
file.write('+Project="AI_ROBOTICS"\n'); | |
file.write('+ProjectDescription=""\n'); | |
file.write('+GPUJOB=true\n'); | |
file.write('Requirements=TARGET.GPUSlot\n'); | |
file.write('Environment=LD_LIBRARY_PATH=/scratch/vision/dineshj/caffe_vision_extra//leveldb/:/scratch/vision/dineshj/caffe_vision_extra//snappy/install/lib/:/scratch/vision/dineshj/caffe_vision_extra//OpenBLAS/build//lib/:/scratch/vision/dineshj/caffe_vision_extra//glog-0.3.3/install/lib/:/scratch/vision/dineshj/caffe_vision_extra//gflags/build/lib/:/scratch/vision/dineshj/caffe_vision_extra//lmdb/:/scratch/vision/dineshj/caffe_vision_extra//protobuf/install//lib/:/lusr/opt/boost-1.54/lib/:/opt/cuda-7.0/lib64/:/opt/cuda-7.0/nvvm/libdevice/:/usr/:/scratch/vision/dineshj/caffe_vision_extra/hdf5-1.8.15-patch1/install/lib/:/usr/lib/x86_64-linux-gnu/:/scratch/vision/dineshj/caffe_vis/build/lib/:/lib/x86_64-linux-gnu/:/v/filer4b/software/matlab-r2015b/bin/glnxa64/:/v/filer4b/software/matlab-r2015b/runtime/glnxa64/:/vision/vision_users/dineshj/local_installs/lib/;\n'); | |
file.write('Universe = vanilla\n'); | |
file.write('Getenv = True\n'); | |
file.write('Log = ../condor/%d.log\n' % clusterno); | |
file.write('Output = ../condor/%d.out\n' % clusterno); | |
file.write('Error = ../condor/%d.err\n' % clusterno); | |
file.write('Notification = Complete\n'); | |
file.write('Executable=../caffe2/tools/caffe\n'); | |
file.write('Arguments = train -gpu 0'); | |
file.write(' -solver %s' % solver_file); | |
if resume_flag: | |
file.write(' -snapshot %s' % snapshot) | |
if (not resume_flag) and finetune_flag: | |
file.write(' -weights %s' % weights) | |
file.write('\nQueue 1'); | |
if not args.submit: | |
debug_here() | |
retcode=call("condor_submit %s 2> /dev/null" % condor_submitFile, shell=True) | |
elif hpcsystem=="slurm": | |
slurm_submitFile='../condor/%d.slurm_submit' % clusterno | |
try: | |
runtime | |
except: | |
runtime=20 | |
print ("Slurm submit file: %s" % slurm_submitFile); | |
with open(slurm_submitFile,'w') as file: | |
file.write("#!/bin/bash\n"); | |
file.write("#SBATCH -J tyrion\n"); | |
file.write("#SBATCH -o ../condor/%d.err\n" % clusterno) | |
file.write('#SBATCH -p gpu\n'); | |
file.write('#SBATCH -n 1\n'); | |
file.write('#SBATCH -A Visual-Recognition\n'); #Visual-Recognition || CS381V-Visual-Recogn || Fine-Tuning-CNNs | |
file.write('#SBATCH -t 00:%02d:00\n' % runtime); | |
if clustername=="stampede": | |
file.write('time ../caffe2_build_stampede/tools/caffe train -gpu 0'); | |
elif clustername=="maverick": | |
file.write('time ../caffe2/tools/caffe train -gpu 0'); | |
else: | |
raise Exception('clustername %s not handled for slurm submission' % clustername) | |
file.write(' -solver %s' % solver_file); | |
if resume_flag: | |
file.write(' -snapshot %s' % snapshot) | |
if (not resume_flag) and finetune_flag: | |
file.write(' -weights %s' % weights) | |
file.write('\npython nn_eval.py --max_jobs 25 --max_test_images 1000 -k 1 -m %s' % solver.sp["snapshot_prefix"].strip('"')); | |
if not args.submit: | |
debug_here() | |
retcode=call("sbatch " + slurm_submitFile, shell=True) | |
if retcode !=0: | |
print>>sys.stderr, "Child was terminated by signal", retcode | |
raise Exception('could not submit job') | |
# then update .last clustenro | |
with open('.lastclusterno', 'w') as f: | |
f.write('%d' % clusterno); | |
csvfile.close() | |
if __name__ == "__main__": | |
parser=argparse.ArgumentParser() | |
parser.add_argument('-n', '--numjobs', type=int, | |
default=1, help="number of jobs to submit"); | |
submit_parser = parser.add_mutually_exclusive_group(required=False) | |
submit_parser.add_argument('--submit', dest='submit', action='store_true') | |
submit_parser.add_argument('--nosubmit', dest='submit', action='store_false') | |
parser.set_defaults(submit=True) | |
debug_parser = parser.add_mutually_exclusive_group(required=False) | |
debug_parser.add_argument('--debug', dest='debug', action='store_true') | |
debug_parser.add_argument('--nodebug', dest='debug', action='store_false') | |
parser.set_defaults(debug=False) | |
parser.add_argument('--config', type=str, | |
default="drlim", help="cls | drlim | ved_drlim"); | |
pretrain_parser = parser.add_mutually_exclusive_group(required=False) | |
pretrain_parser.add_argument('--pretrain', dest='pretrain', action='store_true'); | |
pretrain_parser.add_argument('--finetune', dest='pretrain', action='store_false'); | |
parser.set_defaults(pretrain=True) | |
parser.add_argument('--num_dropouts', type=int, | |
default=1, help=""); | |
args=parser.parse_args() | |
caffe.set_mode_gpu() | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function kitti227_submitjobs(nosubmit, numjobs) | |
% Write a script to submit multiple jobs, by | |
% (1) DONE Modifying finderNet_solver as necessary e.g. net file name: condor/finderNet_%d.prototxt, snapshot name, and dumping it back into a new solver file, | |
% (2) DONE reading in the current version of finderNet.prototxt, modifying it as necessary, and dumping it back into a new prototxt condor/finderNet_%d.prototxt, | |
% (3) DONE reading in the current version of run_learnCNN.condor, modifying it as necessary (new solvername) and dumping it back into a new condor/run_learnCNN_%d.condor | |
% (4) DONE issuing condor_submit condor/run_learnCNN_%d.condor | |
try | |
run('../addLibs.m'); | |
system('hostname > tmp.txt'); | |
if ~isempty(regexp(fileread('tmp.txt'), 'vision', 'once')) || ~isempty(regexp(fileread('tmp.txt'), 'adriana', 'once')) || ~isempty(regexp(fileread('tmp.txt'), 'eldar', 'once')) | |
hpcsystem='condor'; | |
else | |
hpcsystem='slurm'; | |
end | |
if ~isempty(regexp(fileread('tmp.txt'), 'maverick', 'once')) | |
clustername='maverick'; | |
elseif ~isempty(regexp(fileread('tmp.txt'), 'stampede', 'once')) | |
clustername='stampede'; | |
else | |
clustername='condor'; | |
end | |
if nargin<1, numjobs=1; end | |
if nargin<2, nosubmit=true; end | |
no_test=false; | |
%% Declaring solver base parameters | |
input_dim={'1','3','227','227'}; | |
hpcjob.hide.resume_flag=false; | |
hpcjob.snapshot_clust=1e6+(3626:3635); | |
hpcjob.snapshot_iter=15000*ones(1,numel(hpcjob.snapshot_clust)); | |
hpcjob.hide.finetune_flag=false; | |
%starting_model_file='/work/01932/dineshj/caffe2/models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'; | |
%if hpcjob.hide.finetune_flag | |
% switch clustername | |
% case {'stampede', 'maverick'} | |
% hpcjob.model='/work/01932/dineshj/caffe2/models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'; | |
% case 'condor' | |
% hpcjob.model='/scratch/vision/dineshj/caffe2/models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'; | |
% end | |
%end | |
config='ved_drlim'; | |
switch config | |
case 'cls' | |
hpcjob.hide.finetune_flag=true; new_layers={'"cls_prefinal"','"cls_final"'}; finetune_lr_slowdown_factor=10; | |
no_test=false; | |
if strcmp(hpcsystem, 'condor') | |
no_test=true; % to keep things fast | |
end | |
num_cls=397; | |
solver_base=readPrototxt('../kitti_codes/solver_227.prototxt') | |
net_base=readPrototxt('../kitti_codes/clsnet_227.prototxt') | |
%cls_trn_sfx='_mini'; solver_base.max_iter=num2str(3e3); solver_base.snapshot=num2str(1.5e3); time=10; | |
cls_trn_sfx='_5'; solver_base.max_iter=num2str(2e4); solver_base.snapshot=num2str(2e3); time=240; | |
%cls_trn_sfx='_50'; solver_base.max_iter=num2str(1e4); solver_base.snapshot=num2str(2e3); time=150; | |
solver_base.stepsize=num2str(2e4); | |
batch_size=num2str(128); | |
solver_base.weight_decay=num2str(0); | |
solver_base.type='"SGD"'; | |
switch solver_base.type | |
case '"SGD"' | |
%loss_weights=10.^[-4:0]; | |
%loss_weights=10.^-1*ones(1,5); | |
%loss_weights=10.^[0:-0.5:-4.5]; | |
loss_weights=10.^[2.0:-0.5:0]; | |
case '"Adagrad"' | |
loss_weights=10.^[-2:+2]; | |
%loss_weights=10.^[-5:-3, +3:+4]; | |
otherwise | |
abort; | |
end | |
case {'cls_L3','cls_L5', 'cls_L6'} | |
new_layers={'"cls_final"'}; finetune_lr_slowdown_factor=10; | |
hpcjob.hide.finetune_flag=true; | |
hpcjob.model_clust=1e6+(3701:3710); | |
hpcjob.model_iter=10000*ones(1,numel(hpcjob.model_clust)); | |
no_test=false; | |
%if strcmp(hpcsystem, 'condor') | |
% no_test=true; % to keep things fast | |
%end | |
num_cls=397; | |
solver_base=readPrototxt('../kitti_codes/solver_227.prototxt') | |
if strcmp(config, 'cls_L5'), net_base=readPrototxt('../kitti_codes/clsnet_227_L5.prototxt'); | |
elseif strcmp(config, 'cls_L6'), net_base=readPrototxt('../kitti_codes/clsnet_227_L6.prototxt'); | |
elseif strcmp(config, 'cls_L3'), net_base=readPrototxt('../kitti_codes/clsnet_227_L3.prototxt'); | |
else, error; end | |
%cls_trn_sfx='_mini'; solver_base.max_iter=num2str(3e3); solver_base.snapshot=num2str(1.5e3); time=10; | |
cls_trn_sfx='_ntpc5_run1_train-lmdb'; solver_base.max_iter=num2str(2e4); solver_base.snapshot=num2str(2e3); time=600; | |
%cls_trn_sfx='_50'; solver_base.max_iter=num2str(1e4); solver_base.snapshot=num2str(2e3); time=150; | |
solver_base.momentum=num2str(0.9); | |
%solver_base.momentum2=num2str(0.999); | |
solver_base.lr_policy='"step"'; | |
solver_base.stepsize=num2str(5000); | |
solver_base.gamma=num2str(0.5); | |
solver_base.weight_decay =num2str(0.005); | |
batch_size=num2str(128); | |
solver_base.display=num2str(20); | |
solver_base.type='"SGD"'; | |
%solver_base.type='"Adam"'; | |
switch solver_base.type | |
case '"SGD"' | |
%loss_weights=10.^[-4:0]; | |
%loss_weights=10.^-1*ones(1,5); | |
%loss_weights=10.^[0:-0.5:-4.5]; | |
%loss_weights=10.^[1:-0.5:-3.5]; | |
loss_weights=ones(1,10); | |
case '"Adagrad"' | |
loss_weights=ones(1,10); | |
%loss_weights=10.^[-5:-3, +3:+4]; | |
case '"Adam"' | |
loss_weights=ones(1,10); | |
otherwise | |
abort; | |
end | |
switch hpcjob.hide.finetune_flag | |
case true | |
solver_base.weight_decay=num2str(5e-4); | |
solver_base.base_lr=num2str(1e-3); | |
solver_base.max_iter=num2str(3e4); | |
solver_base.snapshot=num2str(2e4); | |
solver_base.test_interval=num2str(1e2); | |
case false | |
hpcjob.hide.model=''; | |
end | |
learning_rates=1e-2*ones(1,numjobs); | |
hpcjob.target_output='cls_accuracy'; hpcjob.target_drn='h'; hpcjob.target_perfect=num2str(1.0); | |
hpcjob.overfit_margin=num2str(0.02); | |
hpcjob.saturation_wait=num2str(5e3); | |
case 'drlim' | |
no_test=false; | |
withcls=1; cls_weight=1; | |
net_base=readPrototxt('../kitti_codes/drlimnet_227_L6.prototxt'); | |
solver_base=readPrototxt('../kitti_codes/solver_227.prototxt') | |
cls_trn_sfx='_ntpc5_run1_train-lmdb'; solver_base.max_iter=num2str(1e4); solver_base.snapshot=num2str(2e3); time=720; | |
batch_size=num2str(16); | |
solver_base.display=num2str(100); | |
solver_base.weight_decay=num2str(0); | |
%loss_weights=10.^[-3:0.15:-1.65]; | |
%loss_weights=10.^-[-2.4:0.2:-1.6]; | |
loss_weights=zeros(1,5); %10.^-2*ones(1,5); | |
learning_rates=1e-2*ones(1,numel(loss_weights)); | |
hpcjob.target_output='cls_accuracy'; hpcjob.target_drn='h'; hpcjob.target_perfect=num2str(1.0); | |
hpcjob.overfit_margin=num2str(0.02); | |
hpcjob.saturation_wait=num2str(5e3); | |
case 'vednet' | |
no_test=false; | |
withcls=1; cls_weight=1; | |
%withcls=1; cls_trn_sfx='_1e-1_SUN'; num_cls=397; cls_weight=10^0;% for SUN | |
% | |
net_base=readPrototxt('../kitti_codes/equivnet_227_L6.prototxt'); | |
solver_base=readPrototxt('../kitti_codes/solver_227.prototxt') | |
cls_trn_sfx='_ntpc5_run1_train-lmdb'; solver_base.max_iter=num2str(1e4); solver_base.snapshot=num2str(2e3); time=720; | |
batch_size=num2str(16); | |
solver_base.display=num2str(100); | |
solver_base.weight_decay=num2str(0); | |
%loss_weights=10.^[-3:0.15:-1.65]; | |
%loss_weights=10.^-[-2.4:0.2:-1.6]; | |
learning_rates=logspace(-4, -2, 5) | |
loss_weights=zeros(1,5); %10.^-2*ones(1,5); | |
transforms=repmat([1,2,3], numel(loss_weights), 1); | |
hpcjob.target_output='cls_accuracy'; hpcjob.target_drn='h'; hpcjob.target_perfect=num2str(1.0); | |
hpcjob.overfit_margin=num2str(0.02); | |
hpcjob.saturation_wait=num2str(5e3); | |
case 'ved_drlim' | |
no_test=false; | |
withcls=1; cls_weight=10; | |
num_cls=397; | |
pretrain=0; | |
%withcls=1; cls_trn_sfx='_1e-1_SUN'; num_cls=397; cls_weight=10^0;% for SUN | |
% | |
%net_base=readPrototxt('../kitti_codes/equivdrlim_227_L6.prototxt'); | |
solver_base=readPrototxt('../kitti_codes/solver_227.prototxt') | |
if ~pretrain | |
%net_base=readPrototxt('../kitti_codes/equivdrlim_227_L6.prototxt'); | |
net_base=readPrototxt('../kitti_codes/equivdrlim_227_L3.prototxt'); | |
batch_size=num2str(32); | |
cls_trn_sfx='_ntpc5_run1_train-lmdb'; | |
solver_base.max_iter=num2str(2e3); solver_base.snapshot=num2str(5e4); time=600; | |
solver_base.test_interval=num2str(500); | |
solver_base.test_iter=num2str(40); | |
solver_base.type='"Adam"'; | |
solver_base.momentum=num2str(0.9); | |
solver_base.momentum2=num2str(0.999); | |
solver_base.lr_policy='"step"'; | |
solver_base.stepsize=num2str(5000); | |
solver_base.gamma=num2str(0.5); | |
solver_base.weight_decay =num2str(0.005); | |
else | |
net_base=readPrototxt('../kitti_codes/equivdrlim_227_pretrain.prototxt'); | |
batch_size=num2str(64); | |
solver_base.max_iter=num2str(4e4); solver_base.snapshot=num2str(1e4); time=600; | |
solver_base.test_interval=num2str(500); | |
solver_base.test_iter=num2str(10); | |
solver_base.type='"Adam"'; | |
solver_base.momentum=num2str(0.9); | |
solver_base.momentum2=num2str(0.999); | |
solver_base.lr_policy='"step"'; | |
solver_base.stepsize=num2str(5000); | |
solver_base.gamma=num2str(0.5); | |
solver_base.weight_decay =num2str(0.005); | |
end | |
hpcjob.max_time=num2str(time); | |
hpcjob.solver_max_iter=solver_base.max_iter; | |
solver_base.display=num2str(20); hpcjob.solver_display=solver_base.display; | |
hpcjob.solver_weight_decay=solver_base.weight_decay; | |
%loss_weights=10.^[-3:0.15:-1.65]; | |
%loss_weights=10.^-[-2.4:0.2:-1.6]; | |
n=numjobs; | |
%learning_rates=1e-3*ones(1,n); | |
%learning_rates=logspace(-8,-4,n); | |
learning_rates=10^-4; | |
trans_loss_weights=zeros(1, n); %10.^-2*ones(1,5); | |
%trans_loss_weights=ones(1,n); %10.^-2*ones(1,5); | |
trans_loss_margins=100*ones(1, n); %10.^-2*ones(1,5); | |
%drlim_loss_weights=0; %logspace(-7, -2, n); %10.^-2*ones(1,5); | |
%drlim_loss_weights=10.^[-10,-9,-8,-6,-5.5]; %logspace(-7, -2, n); %10.^-2*ones(1,5); | |
drlim_loss_weights=ones(1,numjobs); %10.^-2*ones(1,5); | |
%drlim_loss_weights=logspace(-5,-1,numjobs); %10.^-2*ones(1,5); | |
drlim_loss_margins=100*ones(1,n); % 0 means ignore negatives... | |
%drlim_loss_margins=logspace(-1,+3,n); | |
transforms=repmat([1,2,3], n, 1); | |
hpcjob.target_output='cls_accuracy'; hpcjob.target_drn='h'; hpcjob.target_perfect=num2str(1.0); | |
hpcjob.overfit_margin=num2str(0.02); | |
hpcjob.saturation_wait=num2str(5e3); | |
%case 'triplim' | |
% config = 'triplim'; | |
% triplim_sfx='_nbd20'; | |
% drlim_sfx='_nbd20'; | |
% clsdata='SUN'; | |
% solver_base=readPrototxt('../kitti_codes/solver.prototxt'); | |
% net_base=readPrototxt('../kitti_codes/triplim_net.prototxt'); | |
% switch clsdata | |
% case 'SUN' | |
% withcls=1; cls_trn_sfx='_1e-1_SUN'; num_cls=397; | |
% batch_size=num2str(128); solver_base.max_iter=num2str(1.6e4); time=150; solver_base.snapshot=num2str(2e3); | |
% solver_base.display=num2str(10); solver_base.test_interval=num2str(50); | |
% triplim_loss_weights=10.^[-3.5*ones(1,5) -2.5*ones(1,5) -2*ones(1,5)]; triplim_margins=0.1*ones(1,numel(triplim_loss_weights)); | |
% drlim_loss_weights=10.^0.5*ones(1,numel(triplim_loss_weights)); | |
% case 'KITTI' | |
% NOT_IMPLEMENTED | |
% otherwise | |
% error('Unknown cls dataset %s', clsdata); | |
% end | |
% solver_base.test_iter='1'; | |
% solver_base.type='"Nesterov"'; | |
% solver_base.weight_decay=num2str(0); | |
otherwise | |
error('Unknown configuration %s\n', config); | |
end | |
%switch solver_base.type | |
% case '"SGD"' | |
% case '"Adagrad"' | |
% solver_base.momentum=num2str(0); | |
% case '"Nesterov"' | |
% otherwise | |
% error('Unknown solver type'); | |
%end | |
%hpcjob.hide.model='../caffe_snapshots/1000266_norb_slowpred_fc_iter_50000.caffemodel'; | |
hpcjob.hide.useGPU=true; | |
if strcmp(hpcsystem,'slurm') | |
hpcjob.sys_cmd='"rsync -avrz -e ssh ../clust_runs/ [email protected]:/scratch/vision/dineshj/active/clust_runs/"'; | |
end | |
%hpcjob.numjobs=numjobs; | |
if strcmp(hpcsystem, 'condor') | |
% get clusterno through condor_q or condor_history commands | |
clusterno=-1; | |
system('condor_q -format "%d\n" ClusterId > tmp.txt 2> /dev/null'); | |
allclust=load('tmp.txt'); | |
if ~isempty(allclust) | |
clusterno=max(max(allclust), clusterno); | |
end | |
system('condor_history -format "%d\n" ClusterId 2> /dev/null | head -35 > tmp.txt'); | |
allclust=load('tmp.txt'); | |
if ~isempty(allclust) | |
clusterno=max(max(allclust), clusterno); | |
end | |
if clusterno==-1 | |
clusterno=0; | |
end | |
else % do the next best thing - assign another unique ID | |
origclusterno=load('.lastclusterno'); | |
clusterno=origclusterno; | |
end | |
%if hpcjob.hide.finetune_flag | |
% solver_base.base_lr=num2str(str2num(solver_base.base_lr)/finetune_lr_slowdown_factor); | |
% %hpcjob.hide.model=starting_model_file; | |
%end | |
for i=1:numjobs | |
if hpcjob.hide.resume_flag | |
hpcjob.snapshot=sprintf('../caffe_snapshots/%d_snap_iter_%d.solverstate', hpcjob.snapshot_clust(i), hpcjob.snapshot_iter(i)); | |
end | |
if hpcjob.hide.finetune_flag | |
hpcjob.model=sprintf('../caffe_snapshots/%d_snap_iter_%d.caffemodel', hpcjob.model_clust(i), hpcjob.model_iter(i)); | |
end | |
clusterno=clusterno+1; | |
solver=solver_base; | |
% change some parameter of solver based on value of i | |
%solver.snapshot_prefix=sprintf('"../caffe_snapshots/%d_norb_drlim_fc"', clusterno); | |
tmp=solver.snapshot_prefix; | |
tmp=strrep(tmp,'"',''); | |
[path, name, ~] =fileparts(tmp); | |
solver.snapshot_prefix=sprintf('"%s/%d_%s"', path, clusterno, name); | |
solver.net=sprintf('"../condor/%s_net%d.prototxt"', hpcsystem, clusterno); | |
if exist('learning_rates', 'var') | |
solver.base_lr=num2str(learning_rates(i)); | |
end | |
hpcjob.prefix=sprintf('../condor/%d', clusterno); | |
hpcjob.solver_snapshot_prefix=solver.snapshot_prefix; | |
fprintf('\n Selecting parameter combination #'); | |
%params.Dep.index = params.process; | |
%fprintf('%d(+1) of %d\n\n', i, size(combinations,1)); | |
%assert(i<=size(combinations,1) && i>0); | |
%for paramno=1:length(paramNames) | |
% cmd=sprintf('%s=combinations{i,%d};', paramNames{paramno}, paramno) | |
% eval(cmd); | |
%end | |
%dbstack; keyboard; | |
% dump solver into condor/solver%d_%d.prototxt | |
if no_test | |
solver.test_iter=num2str(0); | |
solver.test_interval=num2str(str2num(solver.max_iter)+2000); | |
end | |
currSolverFile=sprintf('../condor/%s_solver%d.prototxt', hpcsystem, clusterno); | |
%struct2proto(solver, currSolverFile); | |
writePrototxt(solver, currSolverFile); | |
currSolverFile | |
net = net_base; | |
layernames=cellfun(@(x) x.name, net.layer, 'UniformOutput', false); | |
% dump feat and deploy prototxts (may be changed later if necessary) | |
if exist('feat_file', 'var') | |
copyfile(feat_file, sprintf('../condor/%d_feat.prototxt', clusterno)); | |
else | |
fprintf('Generating feat file\n'); | |
writePrototxt(make_feat_net_new(net, input_dim), sprintf('../condor/%d_feat.prototxt', clusterno)); | |
end | |
if exist('deploy_file', 'var') | |
copyfile(deploy_file, sprintf('../condor/%d_deploy.prototxt', clusterno)); | |
else | |
fprintf('Generating deploy file\n'); | |
writePrototxt(make_deploy_net_new(net, input_dim), sprintf('../condor/%d_deploy.prototxt', clusterno)); | |
end | |
switch config | |
case {'cls', 'cls_L3' 'cls_L5', 'cls_L6'} | |
if exist('num_cls', 'var') | |
layerno= strcmp(layernames, '"cls_final"'); | |
net.layer{layerno}.inner_product_param.num_output=num2str(num_cls); | |
end | |
if hpcjob.hide.finetune_flag % setting learning rate higher for new layers | |
for layerno=1:numel(new_layers) | |
layerno=strcmp(layernames, new_layers{layerno}); | |
net.layer{layerno}.param{1}.lr_mult=num2str(str2num(net.layer{layerno}.param{1}.lr_mult)*finetune_lr_slowdown_factor); | |
net.layer{layerno}.param{2}.lr_mult=num2str(str2num(net.layer{layerno}.param{2}.lr_mult)*finetune_lr_slowdown_factor); | |
end | |
end | |
layerno= strcmp(layernames, '"cls_loss"'); | |
net.layer{layerno}.loss_weight=num2str(loss_weights(i)); | |
layerno= strcmp(layernames, '"cls_sun_trn"'); | |
layertypes=cellfun(@(x) x.type, net.layer, 'UniformOutput', false); | |
if strcmp(layertypes(layerno), '"HDF5Data"') | |
net.layer{layerno}.hdf5_data_param.source=sprintf('"../kitti_codes/sun_trn_cls_data_227%s_asp1.txt"', cls_trn_sfx); | |
elseif strcmp(layertypes(layerno), '"Data"') | |
net.layer{layerno}.data_param.source=sprintf('"../kitti_codes/SUN/pulkit_lmdbs/sun_imSz256%s"', cls_trn_sfx); | |
%sprintf('"../kitti_codes/sun_trn_cls_data_227%s_asp1.txt"', cls_trn_sfx); | |
else | |
abort; | |
end | |
%net.layer{layerno}.hdf5_data_param.source=sprintf('"../kitti_codes/sun_trn_cls_data_227%s_asp1.txt"', cls_trn_sfx); | |
tmp=find(strcmp(layertypes, '"HDF5Data"')); | |
for layerno=1:length(tmp) | |
net.layer{tmp(layerno)}.hdf5_data_param.batch_size=batch_size; | |
end | |
tmp=find(strcmp(layertypes, '"Data"')); | |
for layerno=1:length(tmp) | |
net.layer{tmp(layerno)}.data_param.batch_size=batch_size; | |
end | |
if no_test==true | |
layerphases=cellfun(@(x) layerphase(x), net.layer, 'UniformOutput', false); | |
train_layers=find(strcmp(layerphases, 'TRAIN')); | |
for lno=train_layers | |
net.layer{lno}=rmfield(net.layer{lno}, 'include'); | |
end | |
net.layer(strcmp(layerphases, 'TEST'))=[]; | |
new_layerphases=cellfun(@(x) layerphase(x), net.layer, 'UniformOutput', false); | |
end | |
% rewriting deploy and feat prototxts after editing net | |
writePrototxt(make_deploy_net_new(net, input_dim), sprintf('../condor/%d_deploy.prototxt', clusterno)); | |
writePrototxt(make_feat_net_new(net, input_dim), sprintf('../condor/%d_feat.prototxt', clusterno)); | |
case 'drlim' | |
if exist('num_cls', 'var') | |
layerno= strcmp(layernames, '"cls_ip2"'); | |
net.layer{layerno}.inner_product_param.num_output=num2str(num_cls); | |
end | |
layerno= strcmp(layernames, '"drlim_loss"'); | |
net.layer{layerno}.loss_weight=num2str(loss_weights(i)); | |
layerno= strcmp(layernames, '"cls_sun_trn"'); | |
net.layer{layerno}.data_param.source=sprintf('"../kitti_codes/SUN/pulkit_lmdbs/sun_imSz256%s"', cls_trn_sfx); | |
%layerno= strcmp(layernames, '"drlim_kitti_trn"'); | |
%net.layer{layerno}.hdf5_data_param.source=sprintf('"../kitti_codes/trn_sim_pairs%s.txt"', drlim_sfx); | |
%layerno= strcmp(layernames, '"drlim_kitti_tst"'); | |
%net.layer{layerno}.hdf5_data_param.source=sprintf('"../kitti_codes/tst_sim_pairs%s.txt"', drlim_sfx); | |
layertypes=cellfun(@(x) x.type, net.layer, 'UniformOutput', false); | |
tmp=find(strcmp(layertypes, '"HDF5Data"')); | |
for layerno=1:length(tmp) | |
net.layer{tmp(layerno)}.hdf5_data_param.batch_size=batch_size; | |
end | |
layertypes=cellfun(@(x) x.type, net.layer, 'UniformOutput', false); | |
tmp=find(strcmp(layertypes, 'HDF5_DATA')); | |
for layerno=1:length(tmp) | |
net.layer{tmp(layerno)}.hdf5_data_param.batch_size=batch_size; | |
end | |
layerno=find(strcmp(layernames, '"cls_loss"')); | |
if withcls | |
net.layer{layerno}.loss_weight=num2str(cls_weight); % for SUN classes | |
else | |
net.layer{layerno}.loss_weight=num2str(0); | |
end | |
% rewriting deploy and feat prototxts after editing net | |
writePrototxt(make_deploy_net_new(net, input_dim), sprintf('../condor/%d_deploy.prototxt', clusterno)); | |
writePrototxt(make_feat_net_new(net, input_dim), sprintf('../condor/%d_feat.prototxt', clusterno)); | |
case 'vednet' | |
if exist('num_cls', 'var') | |
layerno= strcmp(layernames, '"cls_final"'); | |
net.layer{layerno}.inner_product_param.num_output=num2str(num_cls); | |
end | |
layerno= strcmp(layernames, '"trans_loss@"'); | |
net.layer{layerno}.loss_weight=num2str(loss_weights(i)); % setting loss weight for all transforms uniformly | |
layerno= strcmp(layernames, '"cls_sun_trn"'); | |
net.layer{layerno}.data_param.source=sprintf('"../kitti_codes/SUN/pulkit_lmdbs/sun_imSz256%s"', cls_trn_sfx); | |
layertypes=cellfun(@(x) x.type, net.layer, 'UniformOutput', false); | |
tmp=find(strcmp(layertypes, '"HDF5Data"')); | |
for layerno=1:length(tmp) | |
net.layer{tmp(layerno)}.hdf5_data_param.batch_size=batch_size; | |
end | |
tmp=find(strcmp(layertypes, 'HDF5_DATA')); | |
for layerno=1:length(tmp) | |
net.layer{tmp(layerno)}.hdf5_data_param.batch_size=batch_size; | |
end | |
layerno= strcmp(layernames, '"cls_loss"'); | |
if withcls | |
net.layer{layerno}.loss_weight=num2str(cls_weight); | |
else | |
net.layer{layerno}.loss_weight=num2str(0); | |
end | |
curr_transforms=transforms(i, :); | |
layerno1=find(strcmp(layernames, '"trans1_mod@"'));% point from which to replicate | |
layerno2=find(strcmp(layernames, '"trans_loss@"'));% point up to which to replicate | |
translayer=net.layer(layerno1); | |
baselayer=net.layer(1:layerno1-1); | |
template=net.layer(layerno1:layerno2); % one for each transform | |
if numel(net.layer)>layerno2 | |
clsnet=net.layer(layerno2+1:end); | |
else | |
clsnet={}; | |
end | |
tmp=reshape(template, 1, numel(template)); % making into row array (in case not already) | |
tmp=repmat(tmp, numel(curr_transforms), 1); | |
% adding slice points in label vector corresponding to each transform | |
label_layerno=find(strcmp(layernames, '"trans_slice_label"')); | |
mute_layerno=find(strcmp(layernames, '"trans_mute"')); | |
assert(numel(label_layerno)==1); | |
minlabelvecsize=max(curr_transforms); | |
baselayer{label_layerno}.slice_param.slice_point{1}=num2str(1); | |
baselayer{label_layerno}.top{1}='"sim_label"'; | |
baselayer{mute_layerno}.bottom{1}='"sim_label"'; | |
for labeldim=1:minlabelvecsize % typically 2nd dimension onwards is transform label (1st dimension is similarity) | |
baselayer{label_layerno}.slice_param.slice_point{end+1}=num2str(1+labeldim); | |
baselayer{label_layerno}.top{end+1}=sprintf('"trans_label%d"', labeldim); | |
baselayer{mute_layerno}.bottom{end+1}=sprintf('"trans_label%d"', labeldim); | |
end | |
baselayer{label_layerno}.top{end+1}='"lab_autodummy2"'; | |
baselayer{mute_layerno}.bottom{end+1}='"lab_autodummy2"'; | |
% layertypes=cellfun(@(x) x.type, baselayer, 'UniformOutput', false); | |
% sourcelayer=find(strcmp(layertypes, 'HDF5_DATA')); | |
% assert(~isempty(sourcelayer)) | |
% for sourcelayerno=1:numel(sourcelayer) | |
% val=strtrim(baselayer{sourcelayer(sourcelayerno)}.hdf5_data_param.source); | |
% val=[val(1:end-1) num2str(trans_dsno) val(end)]; % the transformation number must be the last part of the name | |
% baselayer{sourcelayer(sourcelayerno)}.hdf5_data_param.source=val; | |
% end | |
% for transformno=1:numel(curr_transforms) | |
% %val=strtrim(template{sourcelayer(sourcelayerno)}.hdf5_data_param.source); | |
% %val=[val(1:end-1) num2str(curr_transforms(transformno)) val(end)]; % the transformation number must be the last part of the name | |
% end | |
% changing layer and blob names for each transform cluster | |
changeList={'name', 'top', 'bottom'}; % name fields to change for each transform | |
for layerno=1:numel(template) % for each layer | |
fields=fieldnames(template{layerno}); | |
changefieldnos=find(ismember(fields, changeList)); | |
for fldno=1:numel(changefieldnos) | |
fieldno=changefieldnos(fldno); | |
for transformno=1:numel(curr_transforms) % for each transform | |
val=strtrim(template{layerno}.(fields{fieldno})); | |
if iscell(val) % repeated field | |
for repno=1:length(val) | |
if val{repno}(end-1)=='@' % marker for inserting transformno | |
val{repno}=[val{repno}(1:end-2) num2str(curr_transforms(transformno)) val{repno}(end)]; | |
end | |
end | |
else | |
if val(end-1)=='@' % marker for inserting transformno | |
val=[val(1:end-2) num2str(curr_transforms(transformno)) val(end)]; | |
end | |
end | |
tmp{transformno, layerno}.(fields{fieldno})=val; | |
end | |
end | |
end | |
tmp=tmp(:); | |
net.layer={}; | |
net.layer(1:numel(baselayer))=baselayer; | |
net.layer(end+(1:numel(tmp)))=tmp; | |
net.layer=[net.layer clsnet]; | |
clear tmp | |
% edit vednet deploy file before dumping | |
deploy=make_deploy_net_new(net, input_dim); | |
translayer=repmat(translayer, 1, numel(curr_transforms)); | |
changeList={'name', 'top'}; | |
for transformno=1:length(translayer) | |
for fieldno=1:length(changeList) | |
val=strtrim(translayer{transformno}.(changeList{fieldno})); | |
translayer{transformno}.(changeList{fieldno})=[val(1:end-2) num2str(curr_transforms(transformno)) val(end)]; | |
end | |
translayer{transformno}.bottom=deploy.layer{end}.top; % should be cls_feat | |
end | |
deploy.layer=[deploy.layer translayer]; | |
writePrototxt(make_feat_net_new(net, input_dim), sprintf('../condor/%d_feat.prototxt', clusterno)); | |
writePrototxt(deploy, sprintf('../condor/%d_deploy.prototxt', clusterno)); | |
case 'ved_drlim' | |
%if ~pretrain | |
if exist('num_cls', 'var') | |
layerno= find(strcmp(layernames, '"cls_final"') | strcmp(layernames, '"cls_L6_final"') | strcmp(layernames, '"cls_L3_final"')); | |
for tmp=1:numel(layerno) | |
net.layer{layerno(tmp)}.inner_product_param.num_output=num2str(num_cls); | |
end | |
end | |
%end | |
layerno= strcmp(layernames, '"drlim_loss"'); | |
net.layer{layerno}.loss_weight=num2str(drlim_loss_weights(i)); | |
net.layer{layerno}.contrastive_loss_param.margin=num2str(drlim_loss_margins(i)); | |
layerno= strcmp(layernames, '"trans_loss@"'); | |
net.layer{layerno}.loss_weight=num2str(trans_loss_weights(i)); % setting loss weight for all transforms uniformly | |
net.layer{layerno}.contrastive_loss_param.margin=num2str(trans_loss_margins(i)); | |
if ~pretrain | |
layerno= strcmp(layernames, '"cls_sun_trn"'); | |
net.layer{layerno}.data_param.source=sprintf('"../kitti_codes/SUN/pulkit_lmdbs/sun_imSz256%s"', cls_trn_sfx); | |
end | |
layertypes=cellfun(@(x) x.type, net.layer, 'UniformOutput', false); | |
tmp=find(strcmp(layertypes, '"HDF5Data"')); | |
for layerno=1:length(tmp) | |
net.layer{tmp(layerno)}.hdf5_data_param.batch_size=batch_size; | |
end | |
tmp=find(strcmp(layertypes, '"Data"')); | |
for layerno=1:length(tmp) | |
if ~isempty(findstr(net.layer{tmp(layerno)}.name, 'cls')) | |
continue; | |
end | |
net.layer{tmp(layerno)}.data_param.batch_size=batch_size; | |
end | |
%tmp=find(strcmp(layertypes, 'HDF5_DATA')); | |
%for layerno=1:length(tmp) | |
% net.layer{tmp(layerno)}.hdf5_data_param.batch_size=batch_size; | |
%end | |
%if ~pretrain | |
layerno= find(strcmp(layernames, '"cls_loss"') | strcmp(layernames, '"cls_L3_loss"') | strcmp(layernames, '"cls_L6_loss"')); | |
for tmp=1:numel(layerno) | |
net.layer{layerno(tmp)}.loss_weight=num2str(cls_weight); | |
end | |
%end | |
curr_transforms=transforms(i, :); | |
layerno1=find(strcmp(layernames, '"trans1_mod@"'));% point from which to replicate | |
layerno2=find(strcmp(layernames, '"trans_loss@"'));% point up to which to replicate | |
translayer=net.layer(layerno1); | |
baselayer=net.layer(1:layerno1-1); | |
template=net.layer(layerno1:layerno2); % one for each transform | |
if numel(net.layer)>layerno2 | |
clsnet=net.layer(layerno2+1:end); | |
else | |
clsnet={}; | |
end | |
tmp=reshape(template, 1, numel(template)); % making into row array (in case not already) | |
tmp=repmat(tmp, numel(curr_transforms), 1); | |
% adding slice points in label vector corresponding to each transform | |
label_layerno=find(strcmp(layernames, '"trans_slice_label"')); | |
mute_layerno=find(strcmp(layernames, '"trans_mute"')); | |
assert(numel(label_layerno)==1); | |
minlabelvecsize=max(curr_transforms); | |
baselayer{label_layerno}.slice_param.slice_point{1}=num2str(1); | |
baselayer{label_layerno}.top{1}='"sim_label"'; | |
baselayer{mute_layerno}.bottom{1}='"sim_label"'; | |
for labeldim=1:minlabelvecsize % typically 2nd dimension onwards is transform label (1st dimension is similarity) | |
baselayer{label_layerno}.slice_param.slice_point{end+1}=num2str(1+labeldim); | |
baselayer{label_layerno}.top{end+1}=sprintf('"trans_label%d"', labeldim); | |
baselayer{mute_layerno}.bottom{end+1}=sprintf('"trans_label%d"', labeldim); | |
end | |
baselayer{label_layerno}.top{end+1}='"lab_autodummy2"'; | |
baselayer{mute_layerno}.bottom{end+1}='"lab_autodummy2"'; | |
% changing layer and blob names for each transform cluster | |
changeList={'name', 'top', 'bottom'}; % name fields to change for each transform | |
for layerno=1:numel(template) % for each layer | |
fields=fieldnames(template{layerno}); | |
changefieldnos=find(ismember(fields, changeList)); | |
for fldno=1:numel(changefieldnos) | |
fieldno=changefieldnos(fldno); | |
for transformno=1:numel(curr_transforms) % for each transform | |
val=strtrim(template{layerno}.(fields{fieldno})); | |
if iscell(val) % repeated field | |
for repno=1:length(val) | |
if val{repno}(end-1)=='@' % marker for inserting transformno | |
val{repno}=[val{repno}(1:end-2) num2str(curr_transforms(transformno)) val{repno}(end)]; | |
end | |
end | |
else | |
if val(end-1)=='@' % marker for inserting transformno | |
val=[val(1:end-2) num2str(curr_transforms(transformno)) val(end)]; | |
end | |
end | |
tmp{transformno, layerno}.(fields{fieldno})=val; | |
end | |
end | |
end | |
tmp=tmp(:); | |
net.layer={}; | |
net.layer(1:numel(baselayer))=baselayer; | |
net.layer(end+(1:numel(tmp)))=tmp; | |
net.layer=[net.layer clsnet]; | |
clear tmp | |
% edit vednet deploy file before dumping | |
deploy=make_deploy_net_new(net, input_dim); | |
translayer=repmat(translayer, 1, numel(curr_transforms)); | |
changeList={'name', 'top'}; | |
for transformno=1:length(translayer) | |
for fieldno=1:length(changeList) | |
val=strtrim(translayer{transformno}.(changeList{fieldno})); | |
translayer{transformno}.(changeList{fieldno})=[val(1:end-2) num2str(curr_transforms(transformno)) val(end)]; | |
end | |
translayer{transformno}.bottom=deploy.layer{end}.top; % should be cls_feat | |
end | |
deploy.layer=[deploy.layer translayer]; | |
writePrototxt(make_feat_net_new(net, input_dim), sprintf('../condor/%d_feat.prototxt', clusterno)); | |
writePrototxt(deploy, sprintf('../condor/%d_deploy.prototxt', clusterno)); | |
otherwise | |
error('Unknown config'); | |
end | |
net_fname=solver.net; | |
net_fname(net_fname=='"')=''; | |
writePrototxt(net, net_fname); | |
net_fname | |
% make a copy of current net definition | |
%copyfile(eval(strrep(solver.net,'"','''')), sprintf('condor/condor_net%d.prototxt',clusterno)); | |
% condor_submit run_learnCNN_%d.condor with solver parameter changed | |
hpcjob.solver=currSolverFile; | |
switch hpcsystem | |
case 'condor' | |
condor_submitFile=sprintf('../condor/condor_learnCNN%d.submit',clusterno); | |
struct2condorsubmit(hpcjob, condor_submitFile); | |
if nosubmit, keyboard; end | |
status=system(sprintf('condor_submit %s 2> /dev/null', condor_submitFile)); | |
assert(status==0); | |
case 'slurm' | |
slurm_submitFile=sprintf('../condor/slurm_learnCNN%d.submit',clusterno); | |
if ~exist('time', 'var') | |
time=20; % in minutes | |
end | |
struct2slurmsubmit(hpcjob, slurm_submitFile, clusterno, clustername, time, 'cmd'); | |
if nosubmit, keyboard; end | |
status=system(sprintf('sbatch %s', slurm_submitFile)); | |
assert(status==0); | |
% updating lastclusterno file | |
origclusterno=origclusterno+1; | |
f=fopen('.lastclusterno', 'w'); | |
fprintf(f, '%d', origclusterno); | |
fclose(f); | |
otherwise | |
error('Unknown hpc system'); | |
end | |
fprintf('Pausing to avoid read-write clashes between jobs when accessing data files'); | |
pause(10);% pausing to avoid errors in reading LMDB? | |
end | |
catch err | |
getReport(err) | |
keyboard; | |
end | |
fprintf('\n=============\n'); | |
end | |
function struct2slurmsubmit(object, filename, clusterno, cluster, num_mins, caffe_mode) | |
if nargin<5 | |
num_mins=20; | |
end | |
if nargin<6 | |
caffe_mode='cmd'; | |
end | |
FILE=fopen(filename,'w'); | |
fprintf(FILE, '#!/bin/bash\n'); | |
fprintf(FILE, '#SBATCH -J tyrion\n'); | |
fprintf(FILE, '#SBATCH -o ../condor/%d.err\n', clusterno); | |
%fprintf(FILE, '#SBATCH -o ../condor/%d(%j).err\n', clusterno); | |
fprintf(FILE, '#SBATCH -p gpu\n'); | |
fprintf(FILE, '#SBATCH -n 1\n'); | |
fprintf(FILE, '#SBATCH -A Visual-Recognition\n'); % Visual-Recognition || CS381V-Visual-Recogn || Fine-Tuning-CNNs | |
fprintf(FILE, '#SBATCH -t 00:%02d:00\n', num_mins); | |
%if object.hide.useGPU | |
% switch cluster | |
% case 'stampede' | |
% fprintf(FILE, '#SBATCH -p gpu\n'); | |
% %fprintf(FILE, '#SBATCH -p vis\n'); | |
% case 'maverick' | |
% fprintf(FILE, '#SBATCH -p gpu\n'); | |
% end | |
%else | |
% fprintf(FILE, '#SBATCH -p normal\n'); | |
%end | |
%fprintf(FILE, '#SBATCH -t 00:40:00\n'); | |
%fprintf(FILE, '#SBATCH -t 05:00:00\n'); | |
switch cluster | |
case 'stampede' | |
%fprintf(FILE, 'export LD_LIBRARY_PATH=/work/01932/dineshj/opencv-bleeding/build/lib/:/work/01932/dineshj/boost_1_55_0/stage/lib/:/opt/apps/cuda/6.0/lib64/:/opt/apps/intel13/hdf5/1.8.9/lib:/opt/apps/intel/13/composer_xe_2013.2.146/compiler/lib/:/opt/apps/intel/13/composer_xe_2013.2.146/compiler/lib/intel64/:/opt/apps/intel/13/composer_xe_2013.2.146/compiler/lib/ia32/:/opt/apps/cuda/6.5/lib64/:/work/01932/dineshj/tacc/lib:/work/01932/dineshj/tacc/lib/protobuf/:/opt/apps/intel13/mvapich2/1.9/lib:/opt/apps/intel13/mvapich2/1.9/lib/shared:/opt/apps/intel/13/composer_xe_2013.2.146/tbb/lib/intel64:/opt/intel/mic/coi/host-linux-release/lib:/opt/intel/mic/myo/lib:/opt/apps/intel/13/composer_xe_2013.2.146/mpirt/lib/intel64:/opt/apps/intel/13/composer_xe_2013.2.146/ipp/lib/intel64:/opt/apps/intel/13/composer_xe_2013.2.146/mkl/lib/intel64:/work/apps/matlab/2015a/sys/java/jre/glnxa64/jre/lib/amd64/server:/work/apps/matlab/2015a/runtime/glnxa64:/work/apps/matlab/2015a/bin/glnxa64:/opt/apps/cuda/6.0/computeprof/bin:/home/01932/dineshj/tools/libevent-2.0.22/lib\n'); | |
fprintf(FILE, 'time ../caffe2_build_stampede/tools/caffe train -gpu 0'); | |
case 'maverick' | |
%fprintf(FILE, '$PATH \n'); | |
%fprintf(FILE, '$LD_LIBRARY_PATH \n'); | |
%fprintf(FILE, 'export LD_LIBRARY_PATH=/work/01932/dineshj/opencv-bleeding/build/lib/:/work/01932/dineshj/boost_1_55_0/stage/lib/:/opt/apps/intel14/hdf5/1.8.12/x86_64/lib:/opt/apps/intel/13/composer_xe_2013.2.146/compiler/lib/:/opt/apps/intel/13/composer_xe_2013.2.146/compiler/lib/intel64/:/opt/apps/intel/13/composer_xe_2013.2.146/compiler/lib/ia32/:/opt/apps/cuda/6.5/lib64/:/work/01932/dineshj/tacc/lib:/work/01932/dineshj/tacc/lib/protobuf/:/work/01932/dineshj/anaconda2/lib/:/opt/apps/intel14/mvapich2/2.0b/lib:/opt/apps/intel14/mvapich2/2.0b/lib/shared:/opt/apps/intel/13/composer_xe_2013_sp1.1.106/tbb/lib/intel64:/opt/apps/intel/13/composer_xe_2013_sp1.1.106/compiler/lib/intel64:/opt/apps/intel/13/composer_xe_2013_sp1.1.106/mpirt/lib/intel64:/opt/apps/intel/13/composer_xe_2013_sp1.1.106/ipp/lib/intel64:/opt/apps/intel/13/composer_xe_2013_sp1.1.106/mkl/lib/intel64:/work/apps/matlab/2015a/sys/java/jre/glnxa64/jre/lib/amd64/server:/work/apps/matlab/2015a/runtime/glnxa64:/work/apps/matlab/2015a/bin/glnxa64\n'); | |
%fprintf(FILE, 'ldd /work/01932/dineshj/caffe2/python/caffe/_caffe.so\n'); | |
%fprintf(FILE, 'echo ================================================\n'); | |
%fprintf(FILE, 'ldd /work/01932/dineshj/caffe2/python/caffe/_caffe.so | grep "not found"\n'); | |
switch caffe_mode | |
case 'py' | |
fprintf(FILE, 'python -u ./train_clsnet.py'); | |
case 'cmd' | |
fprintf(FILE, 'time ../caffe2/tools/caffe train -gpu 0'); | |
otherwise | |
error(); | |
end | |
otherwise | |
error('Unknown cluster name'); | |
end | |
switch caffe_mode | |
case 'py' | |
object2=rmfield(object, 'hide'); | |
args=fieldnames(object2); | |
arg_string=''; | |
for argno=1:numel(args) | |
arg_string=[arg_string, ' --', args{argno}, ' ', object2.(args{argno})]; | |
end | |
fprintf(FILE, arg_string); | |
case 'cmd' | |
argumentsCompleted=false; | |
if isfield(object,'solver') | |
if ~isempty(object.solver) | |
fprintf(FILE, ' -solver %s', object.solver); | |
%argumentsCompleted=true; | |
end | |
end | |
if ~argumentsCompleted | |
if isfield(object, 'snapshot') | |
if ~isempty(object.snapshot) | |
fprintf(FILE, ' -snapshot %s', object.snapshot); | |
argumentsCompleted=true; | |
end | |
end | |
end | |
if ~argumentsCompleted | |
if isfield(object, 'model') | |
if ~isempty(object.model) && object.hide.finetune_flag | |
fprintf(FILE, ' -weights %s', object.model); | |
argumentsCompleted=true; | |
end | |
end | |
end | |
if ~argumentsCompleted | |
warning('Training from scratch! Could take ages'); | |
end | |
otherwise | |
error(); | |
end | |
%fprintf(FILE, '\nQueue %d', object.numjobs); | |
%fprintf(FILE, '\nQueue %d', 1); | |
fclose(FILE); | |
end | |
function struct2condorsubmit(object, filename) | |
FILE=fopen(filename,'w'); | |
fprintf(FILE, '+Group="GRAD"\n'); | |
fprintf(FILE, '+Project="AI_ROBOTICS"\n'); | |
fprintf(FILE, '+ProjectDescription=""\n'); | |
if object.hide.useGPU | |
fprintf(FILE, '+GPUJob=true\n'); | |
fprintf(FILE, 'Requirements=TARGET.GPUSlot\n'); | |
end | |
fprintf(FILE, 'Environment=LD_LIBRARY_PATH=/usr/local/cuda:/usr:/usr/lib/x86_64-linux-gnu:/scratch/vision/dineshj/caffe/cudnn-6.5-linux-R1/;\n'); | |
fprintf(FILE, 'Universe = vanilla\n'); | |
fprintf(FILE, 'Getenv = True\n'); | |
fprintf(FILE, 'Log = ../condor/$(Cluster).log\n'); | |
fprintf(FILE, 'Output = ../condor/$(Cluster).out\n'); | |
fprintf(FILE, 'Error = ../condor/$(Cluster).err\n'); | |
fprintf(FILE, 'Notification = Complete\n'); | |
fprintf(FILE, 'Executable=../caffe2/tools/caffe\n'); | |
fprintf(FILE, 'Arguments= train -gpu 0'); | |
argumentsCompleted=false; | |
if isfield(object,'solver') | |
if ~isempty(object.solver) | |
fprintf(FILE, ' -solver %s', object.solver); | |
%argumentsCompleted=true; | |
end | |
end | |
if ~argumentsCompleted | |
if isfield(object, 'snapshot') | |
if ~isempty(object.snapshot) | |
fprintf(FILE, ' -snapshot %s', object.snapshot); | |
argumentsCompleted=true; | |
end | |
end | |
end | |
if ~argumentsCompleted | |
if isfield(object, 'weights') && object.hide.finetune_flag | |
if ~isempty(object.weights) | |
fprintf(FILE, ' -weights %s', object.weights); | |
argumentsCompleted=true; | |
end | |
end | |
end | |
if ~argumentsCompleted | |
warning('Training from scratch! Could take ages'); | |
end | |
%fprintf(FILE, '\nQueue %d', object.numjobs); | |
fprintf(FILE, '\nQueue %d', 1); | |
fclose(FILE); | |
end | |
function phase=layerphase(x) | |
if isfield(x,'include') | |
phase=x.include.phase; | |
else | |
phase='ALL'; | |
end | |
end | |
function paramNames=refineParamList(List, paramNames) | |
repeat=false; | |
newParamNames={}; | |
for i=1:length(paramNames) | |
tmp = eval(sprintf('List.%s',paramNames{i})); | |
if isstruct(tmp) | |
repeat=true; | |
moreParamNames=strcat(paramNames{i}, '.', fieldnames(tmp)); | |
newParamNames(end+1:end+length(moreParamNames))=moreParamNames; | |
else | |
newParamNames{end+1}=paramNames{i}; | |
end | |
end | |
paramNames=newParamNames; | |
if repeat | |
paramNames=refineParamList(List, paramNames);% recursive call | |
end | |
end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import caffe | |
from caffe import layers as L | |
from caffe import params as P | |
import tempfile | |
from IPython.core.debugger import Tracer; debug_here = Tracer() | |
#weight_param = dict(lr_mult=1, decay_mult=1) | |
#bias_param = dict(lr_mult=2, decay_mult=0) | |
#learned_param = [weight_param, bias_param] | |
def learned_param(name="", n_param=2): | |
param=[] | |
for i in range(n_param): | |
if name: | |
param.append(dict( | |
name = name+"_w" + str(i+1), | |
lr_mult = 1 if i==0 else 2, | |
decay_mult = 0) | |
#decay_mult = 1 if i==0 else 0) | |
) | |
else: | |
param.append(dict( | |
lr_mult = 1 if i==0 else 2, | |
decay_mult = 0) | |
#decay_mult = 1 if i==0 else 0) | |
) | |
return param | |
def decay_param(name="", n_param=2): | |
param=[] | |
for i in range(n_param): | |
if name: | |
param.append(dict( | |
name = name+"_w" + str(i+1), | |
lr_mult = 1 if i==0 else 2, | |
decay_mult = 1 if i==0 else 0) | |
) | |
else: | |
param.append(dict( | |
lr_mult = 1 if i==0 else 2, | |
decay_mult = 1 if i==0 else 0) | |
) | |
return param | |
def frozen_param(name="", n_param=2): | |
param=[] | |
for i in range(n_param): | |
if name: | |
param.append(dict( | |
name = name +"_w" + str(i+1), | |
lr_mult = 0, | |
decay_mult = 0)); | |
else: | |
param.append(dict( | |
lr_mult = 0, | |
decay_mult = 0)) | |
return param | |
# returns one layer stack based on the CaffeNet architecture | |
def conv_relu(bottom, ks, nout, stride=1, pad=0, group=1, | |
param=learned_param(), | |
weight_filler=dict(type='gaussian', std=0.01), | |
bias_filler=dict(type='constant', value=0), | |
names=["",""]): | |
if not names[0]: | |
conv = L.Convolution(bottom, kernel_size=ks, stride=stride, | |
num_output=nout, pad=pad, group=group, | |
param=param, weight_filler=weight_filler, | |
bias_filler=bias_filler) | |
relu = L.ReLU(conv, in_place=True) | |
else: | |
conv = L.Convolution(bottom, kernel_size=ks, stride=stride, | |
num_output=nout, pad=pad, group=group, | |
param=param, weight_filler=weight_filler, | |
bias_filler=bias_filler, name=names[0]) | |
relu = L.ReLU(conv, in_place=True, name=names[1]) | |
return conv,relu | |
def fc_relu(bottom, nout, | |
param=learned_param(), | |
weight_filler=dict(type='gaussian', std=0.005), | |
bias_filler=dict(type='constant', value=0), | |
names=["",""]): | |
if not names[0]: | |
fc = L.InnerProduct(bottom, num_output=nout, param=param, | |
weight_filler=weight_filler, | |
bias_filler=bias_filler) | |
relu = L.ReLU(fc, in_place=True) | |
else: | |
fc = L.InnerProduct(bottom, num_output=nout, param=param, | |
weight_filler=weight_filler, | |
bias_filler=bias_filler, name=names[0]) | |
relu = L.ReLU(fc, in_place=True, name=names[1]) | |
return fc,relu | |
def max_pool(bottom, ks, stride=1, name=""): | |
if not name: | |
pool= L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride) | |
else: | |
pool=L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride, name=name) | |
return pool | |
def generate_conv1_to_bn6( | |
ns, #NetSpec | |
bottom_blob="", learn_all=False, | |
perLayerBatchNormFlag=True, num_dropouts=1, | |
in_place_pool5=True, | |
blob_prefix="", blob_suffix="", | |
layer_prefix="", layer_suffix="", | |
param_prefix="", param_suffix="", | |
top_blob=""): | |
# n = netspec | |
def str2blobname(string): | |
return blob_prefix+string+blob_suffix | |
def str2layername(string): | |
return layer_prefix+string+layer_suffix | |
def str2paramname(string): | |
return param_prefix+string+param_suffix | |
parfoo=[]; | |
parfoo.append(learned_param if learn_all else frozen_param) # for other layers | |
parfoo.append(decay_param if learn_all else frozen_param) # for fc layers | |
if not bottom_blob: | |
ns[str2blobname("data")], ns[str2blobname("labelvec")]=L.Data( | |
name=str2layername("data"), | |
source="./SUN/pulkit_lmdbs/sun_imSz227_ntpc5_run1_train-lmdb", | |
transform_param=dict( | |
mean_value=[104,117,123]), | |
batch_size=5, | |
ntop=2, | |
backend=1); | |
bottom_blob=str2blobname("data") | |
#debug_here() | |
ns[str2blobname("conv1")], ns[str2blobname("relu1")]=conv_relu( | |
ns[bottom_blob], 11, 96, stride=4, | |
param=parfoo[0](name=str2paramname("conv1")), | |
names=[str2layername("conv1"), str2layername("relu1")] | |
) | |
#debug_here() | |
#print(str(ns.to_proto())) | |
ns[str2blobname("pool1")]= max_pool( | |
ns[str2blobname("relu1")], 3, stride=2, | |
name=str2layername("pool1") | |
) | |
if perLayerBatchNormFlag: | |
ns[str2blobname("bn1")]= L.BatchNorm( | |
ns[str2blobname("pool1")], in_place=True, | |
name=str2layername("bn1"), | |
param=frozen_param(name=str2paramname("bn1"), | |
n_param=3)) | |
last_top=ns[str2blobname("bn1")]; | |
else: | |
last_top=ns[str2blobname("pool1")]; | |
ns[str2blobname("norm1")]= L.LRN(last_top, | |
local_size=5, | |
alpha=1e-4, beta=0.75, name=str2layername("norm1")) | |
ns[str2blobname("conv2")], ns[str2blobname("relu2")]=conv_relu( | |
ns[str2blobname("norm1")], 5, 256, pad=2, group=2, | |
param=parfoo[0](name=str2paramname("conv2")), | |
names=[str2layername("conv2"), str2layername("relu2")] | |
) | |
ns[str2blobname("pool2")]= max_pool( | |
ns[str2blobname("relu2")], 3, stride=2, | |
name=str2layername("pool2") | |
) | |
ns[str2blobname("norm2")]= L.LRN(ns[str2blobname("pool2")], | |
local_size=5, | |
alpha=1e-4, beta=0.75, name=str2layername("norm2")) | |
if perLayerBatchNormFlag: | |
ns[str2blobname("bn2")]= L.BatchNorm( | |
ns[str2blobname("norm2")], in_place=True, | |
name=str2layername("bn2"), | |
param=frozen_param( | |
name=str2paramname("bn2"), | |
n_param=3)); | |
last_top=ns[str2blobname("bn2")]; | |
else: | |
last_top=ns[str2blobname("norm2")]; | |
ns[str2blobname("conv3")], ns[str2blobname("relu3")]= conv_relu( | |
last_top, 3, 384, pad=1, | |
param=parfoo[0](name=str2paramname("conv3")), | |
names=[str2layername("conv3"), str2layername("relu3")] | |
) | |
if perLayerBatchNormFlag: | |
ns[str2blobname("bn3")]= L.BatchNorm( | |
ns[str2blobname("relu3")], in_place=True, | |
name=str2layername("bn3"), | |
param=frozen_param( | |
name=str2paramname("bn3"), | |
n_param=3)) | |
last_top=ns[str2blobname("bn3")]; | |
else: | |
last_top=ns[str2blobname("relu3")]; | |
ns[str2blobname("conv4")], ns[str2blobname("relu4")]= conv_relu( | |
last_top, 3, 384, pad=1, group=2, | |
param=parfoo[0](name=str2paramname("conv4")), | |
names=[str2layername("conv4"), str2layername("relu4")] | |
) | |
if perLayerBatchNormFlag: | |
ns[str2blobname("bn4")]= L.BatchNorm( | |
ns[str2blobname("relu4")], in_place=True, | |
name=str2layername("bn4"), | |
param=frozen_param( | |
name=str2paramname("bn4"), | |
n_param=3)) | |
last_top=ns[str2blobname("bn4")]; | |
else: | |
last_top=ns[str2blobname("relu4")]; | |
ns[str2blobname("conv5")], ns[str2blobname("relu5")]= conv_relu( | |
last_top, 3, 256, pad=1, group=2, | |
param=parfoo[0](name=str2paramname("conv5")), | |
names=[str2layername("conv5"), str2layername("relu5")] | |
) | |
ns[str2blobname("pool5")]= max_pool( | |
ns[str2blobname("relu5")], 3, stride=2, | |
name=str2layername("pool5")) | |
if perLayerBatchNormFlag: | |
ns[str2blobname("bn5")]= L.BatchNorm( | |
ns[str2blobname("pool5")], in_place=True, | |
name=str2layername("bn5"), | |
param=frozen_param( | |
name=str2paramname("bn5"), | |
n_param=3)); | |
last_top=ns[str2blobname("bn5")]; | |
last_top_root="bn5" | |
else: | |
last_top=ns[str2blobname("pool5")]; | |
last_top_root="pool5" | |
pool5_top=str2blobname("drop_pool5" + ("" if in_place_pool5 else "_s") ); | |
if num_dropouts>=1: | |
ns[pool5_top]= L.Dropout( | |
last_top, in_place=in_place_pool5, | |
dropout_param=dict(dropout_ratio=0.5), | |
name=str2layername("drop_pool5")) | |
else: | |
ns[pool5_top]= L.Power( | |
last_top, in_place=in_place_pool5, | |
power_param=dict(scale=0.5), | |
name=str2layername("drop_pool5")) | |
ns[str2blobname("fc6")], ns[str2blobname("relu6")]= fc_relu( | |
ns[pool5_top], 4096, | |
param=parfoo[1](name=str2paramname("fc6")), | |
names=[str2layername("fc6"), str2layername("relu6")]) | |
# last layer batch norm... applied independent of perLayerBatchNormFlag | |
if not top_blob: | |
top_blob=str2blobname("bn6"); | |
ns[top_blob]= L.BatchNorm( | |
ns[str2blobname("relu6")], in_place=True, | |
name=str2layername("bn6"), | |
param=frozen_param( | |
name=str2paramname("bn6"), | |
n_param=3)) | |
return ns | |
def generate_classifier( | |
ns, #NetSpec | |
bottom_blob=[], learn_all=False, | |
propagate_down=True, | |
num_dropouts=0, | |
blob_prefix="", blob_suffix="", | |
layer_prefix="", layer_suffix="", | |
param_prefix="", param_suffix="", | |
loss_name="", | |
acc_name="", | |
num_cls=397, | |
loss_weight=1 | |
): | |
# n = netspec | |
def str2blobname(string): | |
return blob_prefix+string+blob_suffix | |
def str2layername(string): | |
return layer_prefix+string+layer_suffix | |
def str2paramname(string): | |
return param_prefix+string+param_suffix | |
parfoo = decay_param if learn_all else frozen_param | |
if not bottom_blob: | |
ns[str2blobname("data")], ns[str2blobname("labelvec")]=L.Data( | |
name=str2layername("data"), | |
source="./SUN/pulkit_lmdbs/sun_imSz227_ntpc5_run1_train-lmdb", | |
transform_param=dict( | |
mean_value=[104,117,123]), | |
batch_size=5, | |
ntop=2, | |
backend=1); | |
bottom_blob=[str2blobname("data"), str2blobname("labelvec")]; | |
if num_dropouts>=1: | |
ns[str2blobname("drop"+bottom_blob[0])]=L.Dropout( | |
ns[bottom_blob[0]], in_place=True, | |
dropout_param=dict(dropout_ratio=0.5), | |
name=str2layername("drop_"+bottom_blob[0]), | |
); | |
last_top=str2blobname("drop"+bottom_blob[0]); | |
else: | |
last_top=bottom_blob[0]; | |
#ns[str2blobname("drop"+bottom_blob[0])]=L.Power( | |
# ns[bottom_blob[0]], in_place=True, | |
# power_param=dict(scale=0.5), | |
# name=str2layername("drop_"+bottom_blob[0]), | |
# propagate_down=propagate_down); | |
ns[str2blobname("prefinal")]=L.InnerProduct( | |
ns[last_top], | |
param=parfoo(), | |
inner_product_param=dict( | |
num_output=500, | |
weight_filler= dict(type="gaussian", std=0.005), | |
bias_filler=dict(type="constant", value=1) | |
), | |
propagate_down=propagate_down, | |
name=str2layername("prefinal") | |
) | |
ns[str2blobname("prefinal_relu")]=L.ReLU( | |
ns[str2blobname("prefinal")], | |
in_place=True, | |
name=str2layername("prefinal_relu") | |
) | |
ns[str2blobname("final")]=L.InnerProduct( | |
ns[str2blobname("prefinal_relu")], | |
param=parfoo(), | |
inner_product_param=dict( | |
num_output=num_cls, | |
weight_filler= dict(type="gaussian", std=0.005), | |
bias_filler=dict(type="constant", value=1) | |
), | |
name=str2layername("final") | |
) | |
ns[loss_name]=L.SoftmaxWithLoss( | |
ns[str2blobname("final")], | |
ns[bottom_blob[1]], | |
name=loss_name, | |
loss_weight=loss_weight | |
) | |
ns[acc_name]=L.Accuracy( | |
ns[str2blobname("final")], | |
ns[bottom_blob[1]], | |
name=acc_name | |
) | |
return ns | |
def generate_contrastive_loss( | |
ns, #NetSpec | |
bottom_blob=[], learn_all=False, | |
blob_prefix="", blob_suffix="", | |
layer_prefix="", layer_suffix="", | |
param_prefix="", param_suffix="", | |
loss_margin=1, | |
loss_weight=1, | |
): | |
# n = netspec | |
def str2blobname(string): | |
return blob_prefix+string+blob_suffix | |
def str2layername(string): | |
return layer_prefix+string+layer_suffix | |
def str2paramname(string): | |
return param_prefix+string+param_suffix | |
parfoo = learned_param if learn_all else frozen_param | |
ns[str2blobname("loss")]=L.ContrastiveLoss( | |
ns[bottom_blob[0]], | |
ns[bottom_blob[1]], | |
ns[bottom_blob[2]], | |
name=str2layername("loss"), | |
loss_weight=loss_weight, | |
contrastive_loss_param=dict( | |
margin=loss_margin | |
) | |
) | |
ns[str2blobname("dist")]=L.EuclideanDist( | |
ns[bottom_blob[0]], | |
ns[bottom_blob[1]], | |
name=str2layername("dist"), | |
loss_weight=0 | |
) | |
ns[str2blobname("AP")], ns[str2blobname("AUROC")]=L.AveragePrec( | |
ns[str2blobname("dist")], | |
ns[bottom_blob[2]], | |
name=str2layername("AP"), | |
loss_weight=[0,0], | |
ntop=2 | |
) | |
return ns | |
def generate_equivariant_map( | |
ns, #NetSpec | |
bottom_blob="", learn_all=False, | |
blob_prefix="", blob_suffix="", | |
layer_prefix="", layer_suffix="", | |
param_prefix="", param_suffix="", | |
bottleneck_size=128, | |
orig_dim=4096, | |
top_blob="", | |
nonDiscrete_flag=False, | |
motion_blob="", | |
): | |
# n = netspec | |
def str2blobname(string): | |
return blob_prefix+string+blob_suffix | |
def str2layername(string): | |
return layer_prefix+string+layer_suffix | |
def str2paramname(string): | |
return param_prefix+string+param_suffix | |
parfoo = decay_param if learn_all else frozen_param | |
ns[str2blobname("map1")], ns[str2blobname("map2")]=fc_relu( | |
ns[bottom_blob], bottleneck_size, | |
param=parfoo(), | |
weight_filler=dict(type="xavier"), | |
bias_filler=dict(type="constant") | |
) | |
if nonDiscrete_flag: | |
ns[str2blobname("mot-map1")], ns[str2blobname("mot-map2")]=fc_relu( | |
ns[motion_blob], bottleneck_size, | |
param=parfoo(), | |
weight_filler=dict(type="xavier"), | |
bias_filler=dict(type="constant") | |
) | |
ns[str2blobname("map2-motion")]=L.Concat( | |
ns[str2blobname("map2")], | |
ns[str2blobname("mot-map2")]); | |
next_bottom=str2blobname("map2-motion"); | |
else: | |
next_bottom=str2blobname("map2"); | |
ns[top_blob]=L.InnerProduct( | |
ns[next_bottom], | |
param=parfoo(), | |
inner_product_param=dict( | |
num_output=orig_dim, | |
weight_filler= dict(type="xavier"), | |
bias_filler=dict(type="constant"), | |
), | |
name=top_blob | |
) | |
return ns | |
class CaffeSolver: | |
""" | |
Caffesolver is a class for creating a solver.prototxt file. It sets default | |
values and can export a solver parameter file. | |
Note that all parameters are stored as strings. Strings variables are | |
stored as strings in strings. | |
""" | |
def __init__(self, testnet_prototxt_path="testnet.prototxt", | |
trainnet_prototxt_path="trainnet.prototxt", debug=False): | |
self.sp = {} | |
# critical: | |
self.sp['base_lr'] = '0.0001' | |
self.sp['momentum'] = '0.9' | |
self.sp['momentum2'] = '0.999' | |
# speed: | |
self.sp['test_iter'] = '10' | |
self.sp['test_interval'] = '500' | |
# looks: | |
self.sp['display'] = '20' | |
self.sp['snapshot'] = '10000' | |
self.sp['snapshot_prefix'] = '"../caffe_snapshots/default_snapshot"' # string withing a string! | |
# learning rate policy | |
self.sp['lr_policy'] = '"step"' | |
self.sp['stepsize'] = '5000' | |
# solver algorithm | |
self.sp['type'] = '"Adam"' | |
# important, but rare: | |
self.sp['gamma'] = '0.5' | |
self.sp['weight_decay'] = '0.0005' | |
#self.sp['train_net'] = '"' + trainnet_prototxt_path + '"' | |
#self.sp['test_net'] = '"' + testnet_prototxt_path + '"' | |
# pretty much never change these. | |
self.sp['max_iter'] = '30000' | |
#self.sp['test_initialization'] = 'true' | |
#self.sp['test_compute_loss'] = 'true' | |
#self.sp['average_loss'] = '25' # this has to do with the display. | |
#self.sp['iter_size'] = '1' # this is for accumulating gradients | |
self.sp['solver_mode'] = 'GPU' | |
if (debug): | |
self.sp['max_iter'] = '12' | |
self.sp['test_iter'] = '1' | |
self.sp['test_interval'] = '4' | |
self.sp['display'] = '1' | |
self.sp['solver_mode']='CPU' | |
def add_from_file(self, filepath): | |
""" | |
Reads a caffe solver prototxt file and updates the Caffesolver | |
instance parameters. | |
""" | |
with open(filepath, 'r') as f: | |
for line in f: | |
if line[0] == '#': | |
continue | |
splitLine = line.split(':') | |
self.sp[splitLine[0].strip()] = splitLine[1].strip() | |
def write(self, filepath): | |
""" | |
Export solver parameters to INPUT "filepath". Sorted alphabetically. | |
""" | |
f = open(filepath, 'w') | |
for key, value in sorted(self.sp.items()): | |
if not(type(value) is str): | |
raise TypeError('All solver parameters must be strings') | |
f.write('%s: %s\n' % (key, value)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment