twmht · October 31, 2017 15:26
diff --git a/caffe.proto b/caffe.proto
 syntax = "proto2";

 package caffe;

 // Specifies the shape (dimensions) of a Blob.
 message BlobShape {
  repeated int64 dim = 1 [packed = true];
 }

 message BlobProto {
  optional BlobShape shape = 7;
  repeated float data = 5 [packed = true];
  repeated float diff = 6 [packed = true];
  repeated double double_data = 8 [packed = true];
  repeated double double_diff = 9 [packed = true];

  // 4D dimensions -- deprecated.  Use "shape" instead.
  optional int32 num = 1 [default = 0];
  optional int32 channels = 2 [default = 0];
  optional int32 height = 3 [default = 0];
  optional int32 width = 4 [default = 0];
 }

 // The BlobProtoVector is simply a way to pass multiple blobproto instances
 // around.
 message BlobProtoVector {
  repeated BlobProto blobs = 1;
 }

 message Datum {
  optional int32 channels = 1;
  optional int32 height = 2;
  optional int32 width = 3;
  // the actual image data, in bytes
  optional bytes data = 4;
  optional int32 label = 5;
  // Optionally, the datum could also hold float data.
  repeated float float_data = 6;
  // If true data contains an encoded image that need to be decoded
  optional bool encoded = 7 [default = false];
 }

 message FillerParameter {
  // The filler type.
  optional string type = 1 [default = 'constant'];
  optional float value = 2 [default = 0]; // the value in constant filler
  optional float min = 3 [default = 0]; // the min value in uniform filler
  optional float max = 4 [default = 1]; // the max value in uniform filler
  optional float mean = 5 [default = 0]; // the mean value in Gaussian filler
  optional float std = 6 [default = 1]; // the std value in Gaussian filler
  // The expected number of non-zero output weights for a given input in
  // Gaussian filler -- the default -1 means don't perform sparsification.
  optional int32 sparse = 7 [default = -1];
  // Normalize the filler variance by fan_in, fan_out, or their average.
  // Applies to 'xavier' and 'msra' fillers.
  enum VarianceNorm {
    FAN_IN = 0;
    FAN_OUT = 1;
    AVERAGE = 2;
  }
  optional VarianceNorm variance_norm = 8 [default = FAN_IN];
 }

 message NetParameter {
  optional string name = 1; // consider giving the network a name
  // DEPRECATED. See InputParameter. The input blobs to the network.
  repeated string input = 3;
  // DEPRECATED. See InputParameter. The shape of the input blobs.
  repeated BlobShape input_shape = 8;

  // 4D input dimensions -- deprecated.  Use "input_shape" instead.
  // If specified, for each input blob there should be four
  // values specifying the num, channels, height and width of the input blob.
  // Thus, there should be a total of (4 * #input) numbers.
  repeated int32 input_dim = 4;

  // Whether the network will force every layer to carry out backward operation.
  // If set False, then whether to carry out backward is determined
  // automatically according to the net structure and learning rates.
  optional bool force_backward = 5 [default = false];
  // The current "state" of the network, including the phase, level, and stage.
  // Some layers may be included/excluded depending on this state and the states
  // specified in the layers' include and exclude fields.
  optional NetState state = 6;

  // Print debugging information about results while running Net::Forward,
  // Net::Backward, and Net::Update.
  optional bool debug_info = 7 [default = false];

  // The layers that make up the net.  Each of their configurations, including
  // connectivity and behavior, is specified as a LayerParameter.
  repeated LayerParameter layer = 100;  // ID 100 so layers are printed last.

  // DEPRECATED: use 'layer' instead.
  repeated V1LayerParameter layers = 2;
 }

 // NOTE
 // Update the next available ID when you add a new SolverParameter field.
 //
 // SolverParameter next available ID: 42 (last added: layer_wise_reduce)
 message SolverParameter {
  //////////////////////////////////////////////////////////////////////////////
  // Specifying the train and test networks
  //
  // Exactly one train net must be specified using one of the following fields:
  //     train_net_param, train_net, net_param, net
  // One or more test nets may be specified using any of the following fields:
  //     test_net_param, test_net, net_param, net
  // If more than one test net field is specified (e.g., both net and
  // test_net are specified), they will be evaluated in the field order given
  // above: (1) test_net_param, (2) test_net, (3) net_param/net.
  // A test_iter must be specified for each test_net.
  // A test_level and/or a test_stage may also be specified for each test_net.
  //////////////////////////////////////////////////////////////////////////////

  // Proto filename for the train net, possibly combined with one or more
  // test nets.
  optional string net = 24;
  // Inline train net param, possibly combined with one or more test nets.
  optional NetParameter net_param = 25;

  optional string train_net = 1; // Proto filename for the train net.
  repeated string test_net = 2; // Proto filenames for the test nets.
  optional NetParameter train_net_param = 21; // Inline train net params.
  repeated NetParameter test_net_param = 22; // Inline test net params.

  // The states for the train/test nets. Must be unspecified or
  // specified once per net.
  //
  // By default, train_state will have phase = TRAIN,
  // and all test_state's will have phase = TEST.
  // Other defaults are set according to the NetState defaults.
  optional NetState train_state = 26;
  repeated NetState test_state = 27;

  // The number of iterations for each test net.
  repeated int32 test_iter = 3;

  // The number of iterations between two testing phases.
  optional int32 test_interval = 4 [default = 0];
  optional bool test_compute_loss = 19 [default = false];
  // If true, run an initial test pass before the first iteration,
  // ensuring memory availability and printing the starting value of the loss.
  optional bool test_initialization = 32 [default = true];
  optional float base_lr = 5; // The base learning rate
  // the number of iterations between displaying info. If display = 0, no info
  // will be displayed.
  optional int32 display = 6;
  // Display the loss averaged over the last average_loss iterations
  optional int32 average_loss = 33 [default = 1];
  optional int32 max_iter = 7; // the maximum number of iterations
  // accumulate gradients over `iter_size` x `batch_size` instances
  optional int32 iter_size = 36 [default = 1];

  // The learning rate decay policy. The currently implemented learning rate
  // policies are as follows:
  //    - fixed: always return base_lr.
  //    - step: return base_lr * gamma ^ (floor(iter / step))
  //    - exp: return base_lr * gamma ^ iter
  //    - inv: return base_lr * (1 + gamma * iter) ^ (- power)
  //    - multistep: similar to step but it allows non uniform steps defined by
  //      stepvalue
  //    - poly: the effective learning rate follows a polynomial decay, to be
  //      zero by the max_iter. return base_lr (1 - iter/max_iter) ^ (power)
  //    - sigmoid: the effective learning rate follows a sigmod decay
  //      return base_lr ( 1/(1 + exp(-gamma * (iter - stepsize))))
  //
  // where base_lr, max_iter, gamma, step, stepvalue and power are defined
  // in the solver parameter protocol buffer, and iter is the current iteration.
  optional string lr_policy = 8;
  optional float gamma = 9; // The parameter to compute the learning rate.
  optional float power = 10; // The parameter to compute the learning rate.
  optional float momentum = 11; // The momentum value.
  optional float weight_decay = 12; // The weight decay.
  // regularization types supported: L1 and L2
  // controlled by weight_decay
  optional string regularization_type = 29 [default = "L2"];
  // the stepsize for learning rate policy "step"
  optional int32 stepsize = 13;
  // the stepsize for learning rate policy "multistep"
  repeated int32 stepvalue = 34;

  // Set clip_gradients to >= 0 to clip parameter gradients to that L2 norm,
  // whenever their actual L2 norm is larger.
  optional float clip_gradients = 35 [default = -1];

  optional int32 snapshot = 14 [default = 0]; // The snapshot interval
  optional string snapshot_prefix = 15; // The prefix for the snapshot.
  // whether to snapshot diff in the results or not. Snapshotting diff will help
  // debugging but the final protocol buffer size will be much larger.
  optional bool snapshot_diff = 16 [default = false];
  enum SnapshotFormat {
    HDF5 = 0;
    BINARYPROTO = 1;
  }
  optional SnapshotFormat snapshot_format = 37 [default = BINARYPROTO];
  // the mode solver will use: 0 for CPU and 1 for GPU. Use GPU in default.
  enum SolverMode {
    CPU = 0;
    GPU = 1;
  }
  optional SolverMode solver_mode = 17 [default = GPU];
  // the device_id will that be used in GPU mode. Use device_id = 0 in default.
  optional int32 device_id = 18 [default = 0];
  // If non-negative, the seed with which the Solver will initialize the Caffe
  // random number generator -- useful for reproducible results. Otherwise,
  // (and by default) initialize using a seed derived from the system clock.
  optional int64 random_seed = 20 [default = -1];

  // type of the solver
  optional string type = 40 [default = "SGD"];

  // numerical stability for RMSProp, AdaGrad and AdaDelta and Adam
  optional float delta = 31 [default = 1e-8];
  // parameters for the Adam solver
  optional float momentum2 = 39 [default = 0.999];

  // RMSProp decay value
  // MeanSquare(t) = rms_decay*MeanSquare(t-1) + (1-rms_decay)*SquareGradient(t)
  optional float rms_decay = 38 [default = 0.99];

  // If true, print information about the state of the net that may help with
  // debugging learning problems.
  optional bool debug_info = 23 [default = false];

  // If false, don't save a snapshot after training finishes.
  optional bool snapshot_after_train = 28 [default = true];

  // DEPRECATED: old solver enum types, use string instead
  enum SolverType {
    SGD = 0;
    NESTEROV = 1;
    ADAGRAD = 2;
    RMSPROP = 3;
    ADADELTA = 4;
    ADAM = 5;
  }
  // DEPRECATED: use type instead of solver_type
  optional SolverType solver_type = 30 [default = SGD];

  // Overlap compute and communication for data parallel training
  optional bool layer_wise_reduce = 41 [default = true];

  optional int32 upload_iters = 42;
  optional string upload_hostname = 43;
  optional string upload_port = 44;
  optional string exp_name = 45;
 }

 // A message that stores the solver snapshots
 message SolverState {
  optional int32 iter = 1; // The current iteration
  optional string learned_net = 2; // The file that stores the learned net.
  repeated BlobProto history = 3; // The history for sgd solvers
  optional int32 current_step = 4 [default = 0]; // The current step for learning rate
 }

 enum Phase {
   TRAIN = 0;
   TEST = 1;
 }

 message NetState {
  optional Phase phase = 1 [default = TEST];
  optional int32 level = 2 [default = 0];
  repeated string stage = 3;
 }

 message NetStateRule {
  // Set phase to require the NetState have a particular phase (TRAIN or TEST)
  // to meet this rule.
  optional Phase phase = 1;

  // Set the minimum and/or maximum levels in which the layer should be used.
  // Leave undefined to meet the rule regardless of level.
  optional int32 min_level = 2;
  optional int32 max_level = 3;

  // Customizable sets of stages to include or exclude.
  // The net must have ALL of the specified stages and NONE of the specified
  // "not_stage"s to meet the rule.
  // (Use multiple NetStateRules to specify conjunctions of stages.)
  repeated string stage = 4;
  repeated string not_stage = 5;
 }

 // Specifies training parameters (multipliers on global learning constants,
 // and the name and other settings used for weight sharing).
 message ParamSpec {
  // The names of the parameter blobs -- useful for sharing parameters among
  // layers, but never required otherwise.  To share a parameter between two
  // layers, give it a (non-empty) name.
  optional string name = 1;

  // Whether to require shared weights to have the same shape, or just the same
  // count -- defaults to STRICT if unspecified.
  optional DimCheckMode share_mode = 2;
  enum DimCheckMode {
    // STRICT (default) requires that num, channels, height, width each match.
    STRICT = 0;
    // PERMISSIVE requires only the count (num*channels*height*width) to match.
    PERMISSIVE = 1;
  }

  // The multiplier on the global learning rate for this parameter.
  optional float lr_mult = 3 [default = 1.0];

  // The multiplier on the global weight decay for this parameter.
  optional float decay_mult = 4 [default = 1.0];
 }

 // NOTE
 // Update the next available ID when you add a new LayerParameter field.
 //
 // LayerParameter next available layer-specific ID: 147 (last added: recurrent_param)
 message LayerParameter {
  optional string name = 1; // the layer name
  optional string type = 2; // the layer type
  repeated string bottom = 3; // the name of each bottom blob
  repeated string top = 4; // the name of each top blob

  // The train / test phase for computation.
  optional Phase phase = 10;

  // The amount of weight to assign each top blob in the objective.
  // Each layer assigns a default value, usually of either 0 or 1,
  // to each top blob.
  repeated float loss_weight = 5;

  // Specifies training parameters (multipliers on global learning constants,
  // and the name and other settings used for weight sharing).
  repeated ParamSpec param = 6;

  // The blobs containing the numeric parameters of the layer.
  repeated BlobProto blobs = 7;

  // Specifies whether to backpropagate to each bottom. If unspecified,
  // Caffe will automatically infer whether each input needs backpropagation
  // to compute parameter gradients. If set to true for some inputs,
  // backpropagation to those inputs is forced; if set false for some inputs,
  // backpropagation to those inputs is skipped.
  //
  // The size must be either 0 or equal to the number of bottoms.
  repeated bool propagate_down = 11;

  // Rules controlling whether and when a layer is included in the network,
  // based on the current NetState.  You may specify a non-zero number of rules
  // to include OR exclude, but not both.  If no include or exclude rules are
  // specified, the layer is always included.  If the current NetState meets
  // ANY (i.e., one or more) of the specified rules, the layer is
  // included/excluded.
  repeated NetStateRule include = 8;
  repeated NetStateRule exclude = 9;

  // Parameters for data pre-processing.
  optional TransformationParameter transform_param = 100;

  // Parameters shared by loss layers.
  optional LossParameter loss_param = 101;

  // Layer type-specific parameters.
  //
  // Note: certain layers may have more than one computational engine
  // for their implementation. These layers include an Engine type and
  // engine parameter for selecting the implementation.
  // The default for the engine is set by the ENGINE switch at compile-time.
  optional AccuracyParameter accuracy_param = 102;
  optional ArgMaxParameter argmax_param = 103;
  optional BatchNormParameter batch_norm_param = 139;
  optional BiasParameter bias_param = 141;
  optional ConcatParameter concat_param = 104;
  optional ContrastiveLossParameter contrastive_loss_param = 105;
  optional ConvolutionParameter convolution_param = 106;
  optional CropParameter crop_param = 144;
  optional DataParameter data_param = 107;
  optional DropoutParameter dropout_param = 108;
  optional DummyDataParameter dummy_data_param = 109;
  optional EltwiseParameter eltwise_param = 110;
  optional ELUParameter elu_param = 140;
  optional EmbedParameter embed_param = 137;
  optional ExpParameter exp_param = 111;
  optional FlattenParameter flatten_param = 135;
  optional HDF5DataParameter hdf5_data_param = 112;
  optional HDF5OutputParameter hdf5_output_param = 113;
  optional HingeLossParameter hinge_loss_param = 114;
  optional ImageDataParameter image_data_param = 115;
  optional InfogainLossParameter infogain_loss_param = 116;
  optional InnerProductParameter inner_product_param = 117;
  optional InputParameter input_param = 143;
  optional LogParameter log_param = 134;
  optional LRNParameter lrn_param = 118;
  optional MemoryDataParameter memory_data_param = 119;
  optional MVNParameter mvn_param = 120;
  optional ParameterParameter parameter_param = 145;
  optional PoolingParameter pooling_param = 121;
  optional PowerParameter power_param = 122;
  optional PReLUParameter prelu_param = 131;
  optional PythonParameter python_param = 130;
  optional RecurrentParameter recurrent_param = 146;
  optional ReductionParameter reduction_param = 136;
  optional ReLUParameter relu_param = 123;
  optional ReshapeParameter reshape_param = 133;
  optional ScaleParameter scale_param = 142;
  optional SigmoidParameter sigmoid_param = 124;
  optional SoftmaxParameter softmax_param = 125;
  optional SPPParameter spp_param = 132;
  optional SliceParameter slice_param = 126;
  optional TanHParameter tanh_param = 127;
  optional ThresholdParameter threshold_param = 128;
  optional TileParameter tile_param = 138;
  optional WindowDataParameter window_data_param = 129;
 }

 // Message that stores parameters used to apply transformation
 // to the data layer's data
 message TransformationParameter {
  // For data pre-processing, we can do simple scaling and subtracting the
  // data mean, if provided. Note that the mean subtraction is always carried
  // out before scaling.
  optional float scale = 1 [default = 1];
  // Specify if we want to randomly mirror data.
  optional bool mirror = 2 [default = false];
  // Specify if we would like to randomly crop an image.
  optional uint32 crop_size = 3 [default = 0];
  // mean_file and mean_value cannot be specified at the same time
  optional string mean_file = 4;
  // if specified can be repeated once (would subtract it from all the channels)
  // or can be repeated the same number of times as channels
  // (would subtract them from the corresponding channel)
  repeated float mean_value = 5;
  // Force the decoded image to have 3 color channels.
  optional bool force_color = 6 [default = false];
  // Force the decoded image to have 1 color channels.
  optional bool force_gray = 7 [default = false];
 }

 // Message that stores parameters shared by loss layers
 message LossParameter {
  // If specified, ignore instances with the given label.
  optional int32 ignore_label = 1;
  // How to normalize the loss for loss layers that aggregate across batches,
  // spatial dimensions, or other dimensions.  Currently only implemented in
  // SoftmaxWithLoss and SigmoidCrossEntropyLoss layers.
  enum NormalizationMode {
    // Divide by the number of examples in the batch times spatial dimensions.
    // Outputs that receive the ignore label will NOT be ignored in computing
    // the normalization factor.
    FULL = 0;
    // Divide by the total number of output locations that do not take the
    // ignore_label.  If ignore_label is not set, this behaves like FULL.
    VALID = 1;
    // Divide by the batch size.
    BATCH_SIZE = 2;
    // Do not normalize the loss.
    NONE = 3;
  }
  // For historical reasons, the default normalization for
  // SigmoidCrossEntropyLoss is BATCH_SIZE and *not* VALID.
  optional NormalizationMode normalization = 3 [default = VALID];
  // Deprecated.  Ignored if normalization is specified.  If normalization
  // is not specified, then setting this to false will be equivalent to
  // normalization = BATCH_SIZE to be consistent with previous behavior.
  optional bool normalize = 2;
 }

 // Messages that store parameters used by individual layer types follow, in
 // alphabetical order.

 message AccuracyParameter {
  // When computing accuracy, count as correct by comparing the true label to
  // the top k scoring classes.  By default, only compare to the top scoring
  // class (i.e. argmax).
  optional uint32 top_k = 1 [default = 1];

  // The "label" axis of the prediction blob, whose argmax corresponds to the
  // predicted label -- may be negative to index from the end (e.g., -1 for the
  // last axis).  For example, if axis == 1 and the predictions are
  // (N x C x H x W), the label blob is expected to contain N*H*W ground truth
  // labels with integer values in {0, 1, ..., C-1}.
  optional int32 axis = 2 [default = 1];

  // If specified, ignore instances with the given label.
  optional int32 ignore_label = 3;
 }

 message ArgMaxParameter {
  // If true produce pairs (argmax, maxval)
  optional bool out_max_val = 1 [default = false];
  optional uint32 top_k = 2 [default = 1];
  // The axis along which to maximise -- may be negative to index from the
  // end (e.g., -1 for the last axis).
  // By default ArgMaxLayer maximizes over the flattened trailing dimensions
  // for each index of the first / num dimension.
  optional int32 axis = 3;
 }

 message ConcatParameter {
  // The axis along which to concatenate -- may be negative to index from the
  // end (e.g., -1 for the last axis).  Other axes must have the
  // same dimension for all the bottom blobs.
  // By default, ConcatLayer concatenates blobs along the "channels" axis (1).
  optional int32 axis = 2 [default = 1];

  // DEPRECATED: alias for "axis" -- does not support negative indexing.
  optional uint32 concat_dim = 1 [default = 1];
 }

 message BatchNormParameter {
  // If false, normalization is performed over the current mini-batch
  // and global statistics are accumulated (but not yet used) by a moving
  // average.
  // If true, those accumulated mean and variance values are used for the
  // normalization.
  // By default, it is set to false when the network is in the training
  // phase and true when the network is in the testing phase.
  optional bool use_global_stats = 1;
  // What fraction of the moving average remains each iteration?
  // Smaller values make the moving average decay faster, giving more
  // weight to the recent values.
  // Each iteration updates the moving average @f$S_{t-1}@f$ with the
  // current mean @f$ Y_t @f$ by
  // @f$ S_t = (1-\beta)Y_t + \beta \cdot S_{t-1} @f$, where @f$ \beta @f$
  // is the moving_average_fraction parameter.
  optional float moving_average_fraction = 2 [default = .999];
  // Small value to add to the variance estimate so that we don't divide by
  // zero.
  optional float eps = 3 [default = 1e-5];
 }

 message BiasParameter {
  // The first axis of bottom[0] (the first input Blob) along which to apply
  // bottom[1] (the second input Blob).  May be negative to index from the end
  // (e.g., -1 for the last axis).
  //
  // For example, if bottom[0] is 4D with shape 100x3x40x60, the output
  // top[0] will have the same shape, and bottom[1] may have any of the
  // following shapes (for the given value of axis):
  //    (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60
  //    (axis == 1 == -3)          3;     3x40;     3x40x60
  //    (axis == 2 == -2)                   40;       40x60
  //    (axis == 3 == -1)                                60
  // Furthermore, bottom[1] may have the empty shape (regardless of the value of
  // "axis") -- a scalar bias.
  optional int32 axis = 1 [default = 1];

  // (num_axes is ignored unless just one bottom is given and the bias is
  // a learned parameter of the layer.  Otherwise, num_axes is determined by the
  // number of axes by the second bottom.)
  // The number of axes of the input (bottom[0]) covered by the bias
  // parameter, or -1 to cover all axes of bottom[0] starting from `axis`.
  // Set num_axes := 0, to add a zero-axis Blob: a scalar.
  optional int32 num_axes = 2 [default = 1];

  // (filler is ignored unless just one bottom is given and the bias is
  // a learned parameter of the layer.)
  // The initialization for the learned bias parameter.
  // Default is the zero (0) initialization, resulting in the BiasLayer
  // initially performing the identity operation.
  optional FillerParameter filler = 3;
 }

 message ContrastiveLossParameter {
  // margin for dissimilar pair
  optional float margin = 1 [default = 1.0];
  // The first implementation of this cost did not exactly match the cost of
  // Hadsell et al 2006 -- using (margin - d^2) instead of (margin - d)^2.
  // legacy_version = false (the default) uses (margin - d)^2 as proposed in the
  // Hadsell paper. New models should probably use this version.
  // legacy_version = true uses (margin - d^2). This is kept to support /
  // reproduce existing models and results
  optional bool legacy_version = 2 [default = false];
 }

 message ConvolutionParameter {
  optional uint32 num_output = 1; // The number of outputs for the layer
  optional bool bias_term = 2 [default = true]; // whether to have bias terms

  // Pad, kernel size, and stride are all given as a single value for equal
  // dimensions in all spatial dimensions, or once per spatial dimension.
  repeated uint32 pad = 3; // The padding size; defaults to 0
  repeated uint32 kernel_size = 4; // The kernel size
  repeated uint32 stride = 6; // The stride; defaults to 1
  // Factor used to dilate the kernel, (implicitly) zero-filling the resulting
  // holes. (Kernel dilation is sometimes referred to by its use in the
  // algorithme à trous from Holschneider et al. 1987.)
  repeated uint32 dilation = 18; // The dilation; defaults to 1

  // For 2D convolution only, the *_h and *_w versions may also be used to
  // specify both spatial dimensions.
  optional uint32 pad_h = 9 [default = 0]; // The padding height (2D only)
  optional uint32 pad_w = 10 [default = 0]; // The padding width (2D only)
  optional uint32 kernel_h = 11; // The kernel height (2D only)
  optional uint32 kernel_w = 12; // The kernel width (2D only)
  optional uint32 stride_h = 13; // The stride height (2D only)
  optional uint32 stride_w = 14; // The stride width (2D only)

  optional uint32 group = 5 [default = 1]; // The group size for group conv

  optional FillerParameter weight_filler = 7; // The filler for the weight
  optional FillerParameter bias_filler = 8; // The filler for the bias
  enum Engine {
    DEFAULT = 0;
    CAFFE = 1;
    CUDNN = 2;
  }
  optional Engine engine = 15 [default = DEFAULT];

  // The axis to interpret as "channels" when performing convolution.
  // Preceding dimensions are treated as independent inputs;
  // succeeding dimensions are treated as "spatial".
  // With (N, C, H, W) inputs, and axis == 1 (the default), we perform
  // N independent 2D convolutions, sliding C-channel (or (C/g)-channels, for
  // groups g>1) filters across the spatial axes (H, W) of the input.
  // With (N, C, D, H, W) inputs, and axis == 1, we perform
  // N independent 3D convolutions, sliding (C/g)-channels
  // filters across the spatial axes (D, H, W) of the input.
  optional int32 axis = 16 [default = 1];

  // Whether to force use of the general ND convolution, even if a specific
  // implementation for blobs of the appropriate number of spatial dimensions
  // is available. (Currently, there is only a 2D-specific convolution
  // implementation; for input blobs with num_axes != 2, this option is
  // ignored and the ND implementation will be used.)
  optional bool force_nd_im2col = 17 [default = false];
 }

 message CropParameter {
  // To crop, elements of the first bottom are selected to fit the dimensions
  // of the second, reference bottom. The crop is configured by
  // - the crop `axis` to pick the dimensions for cropping
  // - the crop `offset` to set the shift for all/each dimension
  // to align the cropped bottom with the reference bottom.
  // All dimensions up to but excluding `axis` are preserved, while
  // the dimensions including and trailing `axis` are cropped.
  // If only one `offset` is set, then all dimensions are offset by this amount.
  // Otherwise, the number of offsets must equal the number of cropped axes to
  // shift the crop in each dimension accordingly.
  // Note: standard dimensions are N,C,H,W so the default is a spatial crop,
  // and `axis` may be negative to index from the end (e.g., -1 for the last
  // axis).
  optional int32 axis = 1 [default = 2];
  repeated uint32 offset = 2;
 }

 message DataParameter {
  enum DB {
    LEVELDB = 0;
    LMDB = 1;
  }
  // Specify the data source.
  optional string source = 1;
  // Specify the batch size.
  optional uint32 batch_size = 4;
  // The rand_skip variable is for the data layer to skip a few data points
  // to avoid all asynchronous sgd clients to start at the same point. The skip
  // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
  // be larger than the number of keys in the database.
  // DEPRECATED. Each solver accesses a different subset of the database.
  optional uint32 rand_skip = 7 [default = 0];
  optional DB backend = 8 [default = LEVELDB];
  // DEPRECATED. See TransformationParameter. For data pre-processing, we can do
  // simple scaling and subtracting the data mean, if provided. Note that the
  // mean subtraction is always carried out before scaling.
  optional float scale = 2 [default = 1];
  optional string mean_file = 3;
  // DEPRECATED. See TransformationParameter. Specify if we would like to randomly
  // crop an image.
  optional uint32 crop_size = 5 [default = 0];
  // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror
  // data.
  optional bool mirror = 6 [default = false];
  // Force the encoded image to have 3 color channels
  optional bool force_encoded_color = 9 [default = false];
  // Prefetch queue (Increase if data feeding bandwidth varies, within the
  // limit of device memory for GPU training)
  optional uint32 prefetch = 10 [default = 4];
 }

 message DropoutParameter {
  optional float dropout_ratio = 1 [default = 0.5]; // dropout ratio
 }

 // DummyDataLayer fills any number of arbitrarily shaped blobs with random
 // (or constant) data generated by "Fillers" (see "message FillerParameter").
 message DummyDataParameter {
  // This layer produces N >= 1 top blobs.  DummyDataParameter must specify 1 or N
  // shape fields, and 0, 1 or N data_fillers.
  //
  // If 0 data_fillers are specified, ConstantFiller with a value of 0 is used.
  // If 1 data_filler is specified, it is applied to all top blobs.  If N are
  // specified, the ith is applied to the ith top blob.
  repeated FillerParameter data_filler = 1;
  repeated BlobShape shape = 6;

  // 4D dimensions -- deprecated.  Use "shape" instead.
  repeated uint32 num = 2;
  repeated uint32 channels = 3;
  repeated uint32 height = 4;
  repeated uint32 width = 5;
 }

 message EltwiseParameter {
  enum EltwiseOp {
    PROD = 0;
    SUM = 1;
    MAX = 2;
  }
  optional EltwiseOp operation = 1 [default = SUM]; // element-wise operation
  repeated float coeff = 2; // blob-wise coefficient for SUM operation

  // Whether to use an asymptotically slower (for >2 inputs) but stabler method
  // of computing the gradient for the PROD operation. (No effect for SUM op.)
  optional bool stable_prod_grad = 3 [default = true];
 }

 // Message that stores parameters used by ELULayer
 message ELUParameter {
  // Described in:
  // Clevert, D.-A., Unterthiner, T., & Hochreiter, S. (2015). Fast and Accurate
  // Deep Network Learning by Exponential Linear Units (ELUs). arXiv
  optional float alpha = 1 [default = 1];
 }

 // Message that stores parameters used by EmbedLayer
 message EmbedParameter {
  optional uint32 num_output = 1; // The number of outputs for the layer
  // The input is given as integers to be interpreted as one-hot
  // vector indices with dimension num_input.  Hence num_input should be
  // 1 greater than the maximum possible input value.
  optional uint32 input_dim = 2;

  optional bool bias_term = 3 [default = true]; // Whether to use a bias term
  optional FillerParameter weight_filler = 4; // The filler for the weight
  optional FillerParameter bias_filler = 5; // The filler for the bias

 }

 // Message that stores parameters used by ExpLayer
 message ExpParameter {
  // ExpLayer computes outputs y = base ^ (shift + scale * x), for base > 0.
  // Or if base is set to the default (-1), base is set to e,
  // so y = exp(shift + scale * x).
  optional float base = 1 [default = -1.0];
  optional float scale = 2 [default = 1.0];
  optional float shift = 3 [default = 0.0];
 }

 /// Message that stores parameters used by FlattenLayer
 message FlattenParameter {
  // The first axis to flatten: all preceding axes are retained in the output.
  // May be negative to index from the end (e.g., -1 for the last axis).
  optional int32 axis = 1 [default = 1];

  // The last axis to flatten: all following axes are retained in the output.
  // May be negative to index from the end (e.g., the default -1 for the last
  // axis).
  optional int32 end_axis = 2 [default = -1];
 }

 // Message that stores parameters used by HDF5DataLayer
 message HDF5DataParameter {
  // Specify the data source.
  optional string source = 1;
  // Specify the batch size.
  optional uint32 batch_size = 2;

  // Specify whether to shuffle the data.
  // If shuffle == true, the ordering of the HDF5 files is shuffled,
  // and the ordering of data within any given HDF5 file is shuffled,
  // but data between different files are not interleaved; all of a file's
  // data are output (in a random order) before moving onto another file.
  optional bool shuffle = 3 [default = false];
 }

 message HDF5OutputParameter {
  optional string file_name = 1;
 }

 message HingeLossParameter {
  enum Norm {
    L1 = 1;
    L2 = 2;
  }
  // Specify the Norm to use L1 or L2
  optional Norm norm = 1 [default = L1];
 }

 message ImageDataParameter {
  // Specify the data source.
  optional string source = 1;
  // Specify the batch size.
  optional uint32 batch_size = 4 [default = 1];
  // The rand_skip variable is for the data layer to skip a few data points
  // to avoid all asynchronous sgd clients to start at the same point. The skip
  // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
  // be larger than the number of keys in the database.
  optional uint32 rand_skip = 7 [default = 0];
  // Whether or not ImageLayer should shuffle the list of files at every epoch.
  optional bool shuffle = 8 [default = false];
  // It will also resize images if new_height or new_width are not zero.
  optional uint32 new_height = 9 [default = 0];
  optional uint32 new_width = 10 [default = 0];
  // Specify if the images are color or gray
  optional bool is_color = 11 [default = true];
  // DEPRECATED. See TransformationParameter. For data pre-processing, we can do
  // simple scaling and subtracting the data mean, if provided. Note that the
  // mean subtraction is always carried out before scaling.
  optional float scale = 2 [default = 1];
  optional string mean_file = 3;
  // DEPRECATED. See TransformationParameter. Specify if we would like to randomly
  // crop an image.
  optional uint32 crop_size = 5 [default = 0];
  // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror
  // data.
  optional bool mirror = 6 [default = false];
  optional string root_folder = 12 [default = ""];
 }

 message InfogainLossParameter {
  // Specify the infogain matrix source.
  optional string source = 1;
  optional int32 axis = 2 [default = 1]; // axis of prob
 }

 message InnerProductParameter {
  optional uint32 num_output = 1; // The number of outputs for the layer
  optional bool bias_term = 2 [default = true]; // whether to have bias terms
  optional FillerParameter weight_filler = 3; // The filler for the weight
  optional FillerParameter bias_filler = 4; // The filler for the bias

  // The first axis to be lumped into a single inner product computation;
  // all preceding axes are retained in the output.
  // May be negative to index from the end (e.g., -1 for the last axis).
  optional int32 axis = 5 [default = 1];
  // Specify whether to transpose the weight matrix or not.
  // If transpose == true, any operations will be performed on the transpose
  // of the weight matrix. The weight matrix itself is not going to be transposed
  // but rather the transfer flag of operations will be toggled accordingly.
  optional bool transpose = 6 [default = false];
 }

 message InputParameter {
  // This layer produces N >= 1 top blob(s) to be assigned manually.
  // Define N shapes to set a shape for each top.
  // Define 1 shape to set the same shape for every top.
  // Define no shape to defer to reshaping manually.
  repeated BlobShape shape = 1;
 }

 // Message that stores parameters used by LogLayer
 message LogParameter {
  // LogLayer computes outputs y = log_base(shift + scale * x), for base > 0.
  // Or if base is set to the default (-1), base is set to e,
  // so y = ln(shift + scale * x) = log_e(shift + scale * x)
  optional float base = 1 [default = -1.0];
  optional float scale = 2 [default = 1.0];
  optional float shift = 3 [default = 0.0];
 }

 // Message that stores parameters used by LRNLayer
 message LRNParameter {
  optional uint32 local_size = 1 [default = 5];
  optional float alpha = 2 [default = 1.];
  optional float beta = 3 [default = 0.75];
  enum NormRegion {
    ACROSS_CHANNELS = 0;
    WITHIN_CHANNEL = 1;
  }
  optional NormRegion norm_region = 4 [default = ACROSS_CHANNELS];
  optional float k = 5 [default = 1.];
  enum Engine {
    DEFAULT = 0;
    CAFFE = 1;
    CUDNN = 2;
  }
  optional Engine engine = 6 [default = DEFAULT];
 }

 message MemoryDataParameter {
  optional uint32 batch_size = 1;
  optional uint32 channels = 2;
  optional uint32 height = 3;
  optional uint32 width = 4;
 }

 message MVNParameter {
  // This parameter can be set to false to normalize mean only
  optional bool normalize_variance = 1 [default = true];

  // This parameter can be set to true to perform DNN-like MVN
  optional bool across_channels = 2 [default = false];

  // Epsilon for not dividing by zero while normalizing variance
  optional float eps = 3 [default = 1e-9];
 }

 message ParameterParameter {
  optional BlobShape shape = 1;
 }

 message PoolingParameter {
  enum PoolMethod {
    MAX = 0;
    AVE = 1;
    STOCHASTIC = 2;
  }
  optional PoolMethod pool = 1 [default = MAX]; // The pooling method
  // Pad, kernel size, and stride are all given as a single value for equal
  // dimensions in height and width or as Y, X pairs.
  optional uint32 pad = 4 [default = 0]; // The padding size (equal in Y, X)
  optional uint32 pad_h = 9 [default = 0]; // The padding height
  optional uint32 pad_w = 10 [default = 0]; // The padding width
  optional uint32 kernel_size = 2; // The kernel size (square)
  optional uint32 kernel_h = 5; // The kernel height
  optional uint32 kernel_w = 6; // The kernel width
  optional uint32 stride = 3 [default = 1]; // The stride (equal in Y, X)
  optional uint32 stride_h = 7; // The stride height
  optional uint32 stride_w = 8; // The stride width
  enum Engine {
    DEFAULT = 0;
    CAFFE = 1;
    CUDNN = 2;
  }
  optional Engine engine = 11 [default = DEFAULT];
  // If global_pooling then it will pool over the size of the bottom by doing
  // kernel_h = bottom->height and kernel_w = bottom->width
  optional bool global_pooling = 12 [default = false];
 }

 message PowerParameter {
  // PowerLayer computes outputs y = (shift + scale * x) ^ power.
  optional float power = 1 [default = 1.0];
  optional float scale = 2 [default = 1.0];
  optional float shift = 3 [default = 0.0];
 }

 message PythonParameter {
  optional string module = 1;
  optional string layer = 2;
  // This value is set to the attribute `param_str` of the `PythonLayer` object
  // in Python before calling the `setup()` method. This could be a number,
  // string, dictionary in Python dict format, JSON, etc. You may parse this
  // string in `setup` method and use it in `forward` and `backward`.
  optional string param_str = 3 [default = ''];
  // DEPRECATED
  optional bool share_in_parallel = 4 [default = false];
 }

 // Message that stores parameters used by RecurrentLayer
 message RecurrentParameter {
  // The dimension of the output (and usually hidden state) representation --
  // must be explicitly set to non-zero.
  optional uint32 num_output = 1 [default = 0];

  optional FillerParameter weight_filler = 2; // The filler for the weight
  optional FillerParameter bias_filler = 3; // The filler for the bias

  // Whether to enable displaying debug_info in the unrolled recurrent net.
  optional bool debug_info = 4 [default = false];

  // Whether to add as additional inputs (bottoms) the initial hidden state
  // blobs, and add as additional outputs (tops) the final timestep hidden state
  // blobs.  The number of additional bottom/top blobs required depends on the
  // recurrent architecture -- e.g., 1 for RNNs, 2 for LSTMs.
  optional bool expose_hidden = 5 [default = false];
 }

 // Message that stores parameters used by ReductionLayer
 message ReductionParameter {
  enum ReductionOp {
    SUM = 1;
    ASUM = 2;
    SUMSQ = 3;
    MEAN = 4;
  }

  optional ReductionOp operation = 1 [default = SUM]; // reduction operation

  // The first axis to reduce to a scalar -- may be negative to index from the
  // end (e.g., -1 for the last axis).
  // (Currently, only reduction along ALL "tail" axes is supported; reduction
  // of axis M through N, where N < num_axes - 1, is unsupported.)
  // Suppose we have an n-axis bottom Blob with shape:
  //     (d0, d1, d2, ..., d(m-1), dm, d(m+1), ..., d(n-1)).
  // If axis == m, the output Blob will have shape
  //     (d0, d1, d2, ..., d(m-1)),
  // and the ReductionOp operation is performed (d0 * d1 * d2 * ... * d(m-1))
  // times, each including (dm * d(m+1) * ... * d(n-1)) individual data.
  // If axis == 0 (the default), the output Blob always has the empty shape
  // (count 1), performing reduction across the entire input --
  // often useful for creating new loss functions.
  optional int32 axis = 2 [default = 0];

  optional float coeff = 3 [default = 1.0]; // coefficient for output
 }

 // Message that stores parameters used by ReLULayer
 message ReLUParameter {
  // Allow non-zero slope for negative inputs to speed up optimization
  // Described in:
  // Maas, A. L., Hannun, A. Y., & Ng, A. Y. (2013). Rectifier nonlinearities
  // improve neural network acoustic models. In ICML Workshop on Deep Learning
  // for Audio, Speech, and Language Processing.
  optional float negative_slope = 1 [default = 0];
  enum Engine {
    DEFAULT = 0;
    CAFFE = 1;
    CUDNN = 2;
  }
  optional Engine engine = 2 [default = DEFAULT];
 }

 message ReshapeParameter {
  // Specify the output dimensions. If some of the dimensions are set to 0,
  // the corresponding dimension from the bottom layer is used (unchanged).
  // Exactly one dimension may be set to -1, in which case its value is
  // inferred from the count of the bottom blob and the remaining dimensions.
  // For example, suppose we want to reshape a 2D blob "input" with shape 2 x 8:
  //
  //   layer {
  //     type: "Reshape" bottom: "input" top: "output"
  //     reshape_param { ... }
  //   }
  //
  // If "input" is 2D with shape 2 x 8, then the following reshape_param
  // specifications are all equivalent, producing a 3D blob "output" with shape
  // 2 x 2 x 4:
  //
  //   reshape_param { shape { dim:  2  dim: 2  dim:  4 } }
  //   reshape_param { shape { dim:  0  dim: 2  dim:  4 } }
  //   reshape_param { shape { dim:  0  dim: 2  dim: -1 } }
  //   reshape_param { shape { dim:  0  dim:-1  dim:  4 } }
  //
  optional BlobShape shape = 1;

  // axis and num_axes control the portion of the bottom blob's shape that are
  // replaced by (included in) the reshape. By default (axis == 0 and
  // num_axes == -1), the entire bottom blob shape is included in the reshape,
  // and hence the shape field must specify the entire output shape.
  //
  // axis may be non-zero to retain some portion of the beginning of the input
  // shape (and may be negative to index from the end; e.g., -1 to begin the
  // reshape after the last axis, including nothing in the reshape,
  // -2 to include only the last axis, etc.).
  //
  // For example, suppose "input" is a 2D blob with shape 2 x 8.
  // Then the following ReshapeLayer specifications are all equivalent,
  // producing a blob "output" with shape 2 x 2 x 4:
  //
  //   reshape_param { shape { dim: 2  dim: 2  dim: 4 } }
  //   reshape_param { shape { dim: 2  dim: 4 } axis:  1 }
  //   reshape_param { shape { dim: 2  dim: 4 } axis: -3 }
  //
  // num_axes specifies the extent of the reshape.
  // If num_axes >= 0 (and axis >= 0), the reshape will be performed only on
  // input axes in the range [axis, axis+num_axes].
  // num_axes may also be -1, the default, to include all remaining axes
  // (starting from axis).
  //
  // For example, suppose "input" is a 2D blob with shape 2 x 8.
  // Then the following ReshapeLayer specifications are equivalent,
  // producing a blob "output" with shape 1 x 2 x 8.
  //
  //   reshape_param { shape { dim:  1  dim: 2  dim:  8 } }
  //   reshape_param { shape { dim:  1  dim: 2  }  num_axes: 1 }
  //   reshape_param { shape { dim:  1  }  num_axes: 0 }
  //
  // On the other hand, these would produce output blob shape 2 x 1 x 8:
  //
  //   reshape_param { shape { dim: 2  dim: 1  dim: 8  }  }
  //   reshape_param { shape { dim: 1 }  axis: 1  num_axes: 0 }
  //
  optional int32 axis = 2 [default = 0];
  optional int32 num_axes = 3 [default = -1];
 }

 message ScaleParameter {
  // The first axis of bottom[0] (the first input Blob) along which to apply
  // bottom[1] (the second input Blob).  May be negative to index from the end
  // (e.g., -1 for the last axis).
  //
  // For example, if bottom[0] is 4D with shape 100x3x40x60, the output
  // top[0] will have the same shape, and bottom[1] may have any of the
  // following shapes (for the given value of axis):
  //    (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60
  //    (axis == 1 == -3)          3;     3x40;     3x40x60
  //    (axis == 2 == -2)                   40;       40x60
  //    (axis == 3 == -1)                                60
  // Furthermore, bottom[1] may have the empty shape (regardless of the value of
  // "axis") -- a scalar multiplier.
  optional int32 axis = 1 [default = 1];

  // (num_axes is ignored unless just one bottom is given and the scale is
  // a learned parameter of the layer.  Otherwise, num_axes is determined by the
  // number of axes by the second bottom.)
  // The number of axes of the input (bottom[0]) covered by the scale
  // parameter, or -1 to cover all axes of bottom[0] starting from `axis`.
  // Set num_axes := 0, to multiply with a zero-axis Blob: a scalar.
  optional int32 num_axes = 2 [default = 1];

  // (filler is ignored unless just one bottom is given and the scale is
  // a learned parameter of the layer.)
  // The initialization for the learned scale parameter.
  // Default is the unit (1) initialization, resulting in the ScaleLayer
  // initially performing the identity operation.
  optional FillerParameter filler = 3;

  // Whether to also learn a bias (equivalent to a ScaleLayer+BiasLayer, but
  // may be more efficient).  Initialized with bias_filler (defaults to 0).
  optional bool bias_term = 4 [default = false];
  optional FillerParameter bias_filler = 5;
 }

 message SigmoidParameter {
  enum Engine {
    DEFAULT = 0;
    CAFFE = 1;
    CUDNN = 2;
  }
  optional Engine engine = 1 [default = DEFAULT];
 }

 message SliceParameter {
  // The axis along which to slice -- may be negative to index from the end
  // (e.g., -1 for the last axis).
  // By default, SliceLayer concatenates blobs along the "channels" axis (1).
  optional int32 axis = 3 [default = 1];
  repeated uint32 slice_point = 2;

  // DEPRECATED: alias for "axis" -- does not support negative indexing.
  optional uint32 slice_dim = 1 [default = 1];
 }

 // Message that stores parameters used by SoftmaxLayer, SoftmaxWithLossLayer
 message SoftmaxParameter {
  enum Engine {
    DEFAULT = 0;
    CAFFE = 1;
    CUDNN = 2;
  }
  optional Engine engine = 1 [default = DEFAULT];

  // The axis along which to perform the softmax -- may be negative to index
  // from the end (e.g., -1 for the last axis).
  // Any other axes will be evaluated as independent softmaxes.
  optional int32 axis = 2 [default = 1];
 }

 message TanHParameter {
  enum Engine {
    DEFAULT = 0;
    CAFFE = 1;
    CUDNN = 2;
  }
  optional Engine engine = 1 [default = DEFAULT];
 }

 // Message that stores parameters used by TileLayer
 message TileParameter {
  // The index of the axis to tile.
  optional int32 axis = 1 [default = 1];

  // The number of copies (tiles) of the blob to output.
  optional int32 tiles = 2;
 }

 // Message that stores parameters used by ThresholdLayer
 message ThresholdParameter {
  optional float threshold = 1 [default = 0]; // Strictly positive values
 }

 message WindowDataParameter {
  // Specify the data source.
  optional string source = 1;
  // For data pre-processing, we can do simple scaling and subtracting the
  // data mean, if provided. Note that the mean subtraction is always carried
  // out before scaling.
  optional float scale = 2 [default = 1];
  optional string mean_file = 3;
  // Specify the batch size.
  optional uint32 batch_size = 4;
  // Specify if we would like to randomly crop an image.
  optional uint32 crop_size = 5 [default = 0];
  // Specify if we want to randomly mirror data.
  optional bool mirror = 6 [default = false];
  // Foreground (object) overlap threshold
  optional float fg_threshold = 7 [default = 0.5];
  // Background (non-object) overlap threshold
  optional float bg_threshold = 8 [default = 0.5];
  // Fraction of batch that should be foreground objects
  optional float fg_fraction = 9 [default = 0.25];
  // Amount of contextual padding to add around a window
  // (used only by the window_data_layer)
  optional uint32 context_pad = 10 [default = 0];
  // Mode for cropping out a detection window
  // warp: cropped window is warped to a fixed size and aspect ratio
  // square: the tightest square around the window is cropped
  optional string crop_mode = 11 [default = "warp"];
  // cache_images: will load all images in memory for faster access
  optional bool cache_images = 12 [default = false];
  // append root_folder to locate images
  optional string root_folder = 13 [default = ""];
 }

 message SPPParameter {
  enum PoolMethod {
    MAX = 0;
    AVE = 1;
    STOCHASTIC = 2;
  }
  optional uint32 pyramid_height = 1;
  optional PoolMethod pool = 2 [default = MAX]; // The pooling method
  enum Engine {
    DEFAULT = 0;
    CAFFE = 1;
    CUDNN = 2;
  }
  optional Engine engine = 6 [default = DEFAULT];
 }

 // DEPRECATED: use LayerParameter.
 message V1LayerParameter {
  repeated string bottom = 2;
  repeated string top = 3;
  optional string name = 4;
  repeated NetStateRule include = 32;
  repeated NetStateRule exclude = 33;
  enum LayerType {
    NONE = 0;
    ABSVAL = 35;
    ACCURACY = 1;
    ARGMAX = 30;
    BNLL = 2;
    CONCAT = 3;
    CONTRASTIVE_LOSS = 37;
    CONVOLUTION = 4;
    DATA = 5;
    DECONVOLUTION = 39;
    DROPOUT = 6;
    DUMMY_DATA = 32;
    EUCLIDEAN_LOSS = 7;
    ELTWISE = 25;
    EXP = 38;
    FLATTEN = 8;
    HDF5_DATA = 9;
    HDF5_OUTPUT = 10;
    HINGE_LOSS = 28;
    IM2COL = 11;
    IMAGE_DATA = 12;
    INFOGAIN_LOSS = 13;
    INNER_PRODUCT = 14;
    LRN = 15;
    MEMORY_DATA = 29;
    MULTINOMIAL_LOGISTIC_LOSS = 16;
    MVN = 34;
    POOLING = 17;
    POWER = 26;
    RELU = 18;
    SIGMOID = 19;
    SIGMOID_CROSS_ENTROPY_LOSS = 27;
    SILENCE = 36;
    SOFTMAX = 20;
    SOFTMAX_LOSS = 21;
    SPLIT = 22;
    SLICE = 33;
    TANH = 23;
    WINDOW_DATA = 24;
    THRESHOLD = 31;
  }
  optional LayerType type = 5;
  repeated BlobProto blobs = 6;
  repeated string param = 1001;
  repeated DimCheckMode blob_share_mode = 1002;
  enum DimCheckMode {
    STRICT = 0;
    PERMISSIVE = 1;
  }
  repeated float blobs_lr = 7;
  repeated float weight_decay = 8;
  repeated float loss_weight = 35;
  optional AccuracyParameter accuracy_param = 27;
  optional ArgMaxParameter argmax_param = 23;
  optional ConcatParameter concat_param = 9;
  optional ContrastiveLossParameter contrastive_loss_param = 40;
  optional ConvolutionParameter convolution_param = 10;
  optional DataParameter data_param = 11;
  optional DropoutParameter dropout_param = 12;
  optional DummyDataParameter dummy_data_param = 26;
  optional EltwiseParameter eltwise_param = 24;
  optional ExpParameter exp_param = 41;
  optional HDF5DataParameter hdf5_data_param = 13;
  optional HDF5OutputParameter hdf5_output_param = 14;
  optional HingeLossParameter hinge_loss_param = 29;
  optional ImageDataParameter image_data_param = 15;
  optional InfogainLossParameter infogain_loss_param = 16;
  optional InnerProductParameter inner_product_param = 17;
  optional LRNParameter lrn_param = 18;
  optional MemoryDataParameter memory_data_param = 22;
  optional MVNParameter mvn_param = 34;
  optional PoolingParameter pooling_param = 19;
  optional PowerParameter power_param = 21;
  optional ReLUParameter relu_param = 30;
  optional SigmoidParameter sigmoid_param = 38;
  optional SoftmaxParameter softmax_param = 39;
  optional SliceParameter slice_param = 31;
  optional TanHParameter tanh_param = 37;
  optional ThresholdParameter threshold_param = 25;
  optional WindowDataParameter window_data_param = 20;
  optional TransformationParameter transform_param = 36;
  optional LossParameter loss_param = 42;
  optional V0LayerParameter layer = 1;
 }

 // DEPRECATED: V0LayerParameter is the old way of specifying layer parameters
 // in Caffe.  We keep this message type around for legacy support.
 message V0LayerParameter {
  optional string name = 1; // the layer name
  optional string type = 2; // the string to specify the layer type

  // Parameters to specify layers with inner products.
  optional uint32 num_output = 3; // The number of outputs for the layer
  optional bool biasterm = 4 [default = true]; // whether to have bias terms
  optional FillerParameter weight_filler = 5; // The filler for the weight
  optional FillerParameter bias_filler = 6; // The filler for the bias

  optional uint32 pad = 7 [default = 0]; // The padding size
  optional uint32 kernelsize = 8; // The kernel size
  optional uint32 group = 9 [default = 1]; // The group size for group conv
  optional uint32 stride = 10 [default = 1]; // The stride
  enum PoolMethod {
    MAX = 0;
    AVE = 1;
    STOCHASTIC = 2;
  }
  optional PoolMethod pool = 11 [default = MAX]; // The pooling method
  optional float dropout_ratio = 12 [default = 0.5]; // dropout ratio

  optional uint32 local_size = 13 [default = 5]; // for local response norm
  optional float alpha = 14 [default = 1.]; // for local response norm
  optional float beta = 15 [default = 0.75]; // for local response norm
  optional float k = 22 [default = 1.];

  // For data layers, specify the data source
  optional string source = 16;
  // For data pre-processing, we can do simple scaling and subtracting the
  // data mean, if provided. Note that the mean subtraction is always carried
  // out before scaling.
  optional float scale = 17 [default = 1];
  optional string meanfile = 18;
  // For data layers, specify the batch size.
  optional uint32 batchsize = 19;
  // For data layers, specify if we would like to randomly crop an image.
  optional uint32 cropsize = 20 [default = 0];
  // For data layers, specify if we want to randomly mirror data.
  optional bool mirror = 21 [default = false];

  // The blobs containing the numeric parameters of the layer
  repeated BlobProto blobs = 50;
  // The ratio that is multiplied on the global learning rate. If you want to
  // set the learning ratio for one blob, you need to set it for all blobs.
  repeated float blobs_lr = 51;
  // The weight decay that is multiplied on the global weight decay.
  repeated float weight_decay = 52;

  // The rand_skip variable is for the data layer to skip a few data points
  // to avoid all asynchronous sgd clients to start at the same point. The skip
  // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
  // be larger than the number of keys in the database.
  optional uint32 rand_skip = 53 [default = 0];

  // Fields related to detection (det_*)
  // foreground (object) overlap threshold
  optional float det_fg_threshold = 54 [default = 0.5];
  // background (non-object) overlap threshold
  optional float det_bg_threshold = 55 [default = 0.5];
  // Fraction of batch that should be foreground objects
  optional float det_fg_fraction = 56 [default = 0.25];

  // optional bool OBSOLETE_can_clobber = 57 [default = true];

  // Amount of contextual padding to add around a window
  // (used only by the window_data_layer)
  optional uint32 det_context_pad = 58 [default = 0];

  // Mode for cropping out a detection window
  // warp: cropped window is warped to a fixed size and aspect ratio
  // square: the tightest square around the window is cropped
  optional string det_crop_mode = 59 [default = "warp"];

  // For ReshapeLayer, one needs to specify the new dimensions.
  optional int32 new_num = 60 [default = 0];
  optional int32 new_channels = 61 [default = 0];
  optional int32 new_height = 62 [default = 0];
  optional int32 new_width = 63 [default = 0];

  // Whether or not ImageLayer should shuffle the list of files at every epoch.
  // It will also resize images if new_height or new_width are not zero.
  optional bool shuffle_images = 64 [default = false];

  // For ConcatLayer, one needs to specify the dimension for concatenation, and
  // the other dimensions must be the same for all the bottom blobs.
  // By default it will concatenate blobs along the channels dimension.
  optional uint32 concat_dim = 65 [default = 1];

  optional HDF5OutputParameter hdf5_output_param = 1001;
 }

 message PReLUParameter {
  // Parametric ReLU described in K. He et al, Delving Deep into Rectifiers:
  // Surpassing Human-Level Performance on ImageNet Classification, 2015.

  // Initial value of a_i. Default is a_i=0.25 for all i.
  optional FillerParameter filler = 1;
  // Whether or not slope parameters are shared across channels.
  optional bool channel_shared = 2 [default = false];
 }
diff --git a/Makefile b/Makefile
 PROJECT := caffe

 CONFIG_FILE := Makefile.config
 # Explicitly check for the config file, otherwise make -k will proceed anyway.
 ifeq ($(wildcard $(CONFIG_FILE)),)
 $(error $(CONFIG_FILE) not found. See $(CONFIG_FILE).example.)
 endif
 include $(CONFIG_FILE)

 BUILD_DIR_LINK := $(BUILD_DIR)
 ifeq ($(RELEASE_BUILD_DIR),)
 	RELEASE_BUILD_DIR := .$(BUILD_DIR)_release
 endif
 ifeq ($(DEBUG_BUILD_DIR),)
 	DEBUG_BUILD_DIR := .$(BUILD_DIR)_debug
 endif

 DEBUG ?= 0
 ifeq ($(DEBUG), 1)
 	BUILD_DIR := $(DEBUG_BUILD_DIR)
 	OTHER_BUILD_DIR := $(RELEASE_BUILD_DIR)
 else
 	BUILD_DIR := $(RELEASE_BUILD_DIR)
 	OTHER_BUILD_DIR := $(DEBUG_BUILD_DIR)
 endif

 # All of the directories containing code.
 SRC_DIRS := $(shell find * -type d -exec bash -c "find {} -maxdepth 1 \
 	\( -name '*.cpp' -o -name '*.proto' \) | grep -q ." \; -print)

 # The target shared library name
 LIBRARY_NAME := $(PROJECT)
 LIB_BUILD_DIR := $(BUILD_DIR)/lib
 STATIC_NAME := $(LIB_BUILD_DIR)/lib$(LIBRARY_NAME).a
 DYNAMIC_VERSION_MAJOR 		:= 1
 DYNAMIC_VERSION_MINOR 		:= 0
 DYNAMIC_VERSION_REVISION 	:= 0
 DYNAMIC_NAME_SHORT := lib$(LIBRARY_NAME).so
 #DYNAMIC_SONAME_SHORT := $(DYNAMIC_NAME_SHORT).$(DYNAMIC_VERSION_MAJOR)
 DYNAMIC_VERSIONED_NAME_SHORT := $(DYNAMIC_NAME_SHORT).$(DYNAMIC_VERSION_MAJOR).$(DYNAMIC_VERSION_MINOR).$(DYNAMIC_VERSION_REVISION)
 DYNAMIC_NAME := $(LIB_BUILD_DIR)/$(DYNAMIC_VERSIONED_NAME_SHORT)
 COMMON_FLAGS += -DCAFFE_VERSION=$(DYNAMIC_VERSION_MAJOR).$(DYNAMIC_VERSION_MINOR).$(DYNAMIC_VERSION_REVISION)

 ##############################
 # Get all source files
 ##############################
 # CXX_SRCS are the source files excluding the test ones.
 CXX_SRCS := $(shell find src/$(PROJECT) ! -name "test_*.cpp" -name "*.cpp")
 # CU_SRCS are the cuda source files
 CU_SRCS := $(shell find src/$(PROJECT) ! -name "test_*.cu" -name "*.cu")
 # TEST_SRCS are the test source files
 TEST_MAIN_SRC := src/$(PROJECT)/test/test_caffe_main.cpp
 TEST_SRCS := $(shell find src/$(PROJECT) -name "test_*.cpp")
 TEST_SRCS := $(filter-out $(TEST_MAIN_SRC), $(TEST_SRCS))
 TEST_CU_SRCS := $(shell find src/$(PROJECT) -name "test_*.cu")
 GTEST_SRC := src/gtest/gtest-all.cpp
 # TOOL_SRCS are the source files for the tool binaries
 TOOL_SRCS := $(shell find tools -name "*.cpp")
 # EXAMPLE_SRCS are the source files for the example binaries
 EXAMPLE_SRCS := $(shell find examples -name "*.cpp")
 # BUILD_INCLUDE_DIR contains any generated header files we want to include.
 BUILD_INCLUDE_DIR := $(BUILD_DIR)/src
 # PROTO_SRCS are the protocol buffer definitions
 PROTO_SRC_DIR := src/$(PROJECT)/proto
 PROTO_SRCS := $(wildcard $(PROTO_SRC_DIR)/*.proto)
 # PROTO_BUILD_DIR will contain the .cc and obj files generated from
 # PROTO_SRCS; PROTO_BUILD_INCLUDE_DIR will contain the .h header files
 PROTO_BUILD_DIR := $(BUILD_DIR)/$(PROTO_SRC_DIR)
 PROTO_BUILD_INCLUDE_DIR := $(BUILD_INCLUDE_DIR)/$(PROJECT)/proto
 # NONGEN_CXX_SRCS includes all source/header files except those generated
 # automatically (e.g., by proto).
 NONGEN_CXX_SRCS := $(shell find \
 	src/$(PROJECT) \
 	include/$(PROJECT) \
 	python/$(PROJECT) \
 	matlab/+$(PROJECT)/private \
 	examples \
 	tools \
 	-name "*.cpp" -or -name "*.hpp" -or -name "*.cu" -or -name "*.cuh")
 LINT_SCRIPT := scripts/cpp_lint.py
 LINT_OUTPUT_DIR := $(BUILD_DIR)/.lint
 LINT_EXT := lint.txt
 LINT_OUTPUTS := $(addsuffix .$(LINT_EXT), $(addprefix $(LINT_OUTPUT_DIR)/, $(NONGEN_CXX_SRCS)))
 EMPTY_LINT_REPORT := $(BUILD_DIR)/.$(LINT_EXT)
 NONEMPTY_LINT_REPORT := $(BUILD_DIR)/$(LINT_EXT)
 # PY$(PROJECT)_SRC is the python wrapper for $(PROJECT)
 PY$(PROJECT)_SRC := python/$(PROJECT)/_$(PROJECT).cpp
 PY$(PROJECT)_SO := python/$(PROJECT)/_$(PROJECT).so
 PY$(PROJECT)_HXX := include/$(PROJECT)/layers/python_layer.hpp
 # MAT$(PROJECT)_SRC is the mex entrance point of matlab package for $(PROJECT)
 MAT$(PROJECT)_SRC := matlab/+$(PROJECT)/private/$(PROJECT)_.cpp
 ifneq ($(MATLAB_DIR),)
 	MAT_SO_EXT := $(shell $(MATLAB_DIR)/bin/mexext)
 endif
 MAT$(PROJECT)_SO := matlab/+$(PROJECT)/private/$(PROJECT)_.$(MAT_SO_EXT)

 ##############################
 # Derive generated files
 ##############################
 # The generated files for protocol buffers
 PROTO_GEN_HEADER_SRCS := $(addprefix $(PROTO_BUILD_DIR)/, \
 		$(notdir ${PROTO_SRCS:.proto=.pb.h}))
 PROTO_GEN_HEADER := $(addprefix $(PROTO_BUILD_INCLUDE_DIR)/, \
 		$(notdir ${PROTO_SRCS:.proto=.pb.h}))
 PROTO_GEN_CC := $(addprefix $(BUILD_DIR)/, ${PROTO_SRCS:.proto=.pb.cc})
 PY_PROTO_BUILD_DIR := python/$(PROJECT)/proto
 PY_PROTO_INIT := python/$(PROJECT)/proto/__init__.py
 PROTO_GEN_PY := $(foreach file,${PROTO_SRCS:.proto=_pb2.py}, \
 		$(PY_PROTO_BUILD_DIR)/$(notdir $(file)))
 # The objects corresponding to the source files
 # These objects will be linked into the final shared library, so we
 # exclude the tool, example, and test objects.
 CXX_OBJS := $(addprefix $(BUILD_DIR)/, ${CXX_SRCS:.cpp=.o})
 CU_OBJS := $(addprefix $(BUILD_DIR)/cuda/, ${CU_SRCS:.cu=.o})
 PROTO_OBJS := ${PROTO_GEN_CC:.cc=.o}
 OBJS := $(PROTO_OBJS) $(CXX_OBJS) $(CU_OBJS)
 # tool, example, and test objects
 TOOL_OBJS := $(addprefix $(BUILD_DIR)/, ${TOOL_SRCS:.cpp=.o})
 TOOL_BUILD_DIR := $(BUILD_DIR)/tools
 TEST_CXX_BUILD_DIR := $(BUILD_DIR)/src/$(PROJECT)/test
 TEST_CU_BUILD_DIR := $(BUILD_DIR)/cuda/src/$(PROJECT)/test
 TEST_CXX_OBJS := $(addprefix $(BUILD_DIR)/, ${TEST_SRCS:.cpp=.o})
 TEST_CU_OBJS := $(addprefix $(BUILD_DIR)/cuda/, ${TEST_CU_SRCS:.cu=.o})
 TEST_OBJS := $(TEST_CXX_OBJS) $(TEST_CU_OBJS)
 GTEST_OBJ := $(addprefix $(BUILD_DIR)/, ${GTEST_SRC:.cpp=.o})
 EXAMPLE_OBJS := $(addprefix $(BUILD_DIR)/, ${EXAMPLE_SRCS:.cpp=.o})
 # Output files for automatic dependency generation
 DEPS := ${CXX_OBJS:.o=.d} ${CU_OBJS:.o=.d} ${TEST_CXX_OBJS:.o=.d} \
 	${TEST_CU_OBJS:.o=.d} $(BUILD_DIR)/${MAT$(PROJECT)_SO:.$(MAT_SO_EXT)=.d}
 # tool, example, and test bins
 TOOL_BINS := ${TOOL_OBJS:.o=.bin}
 EXAMPLE_BINS := ${EXAMPLE_OBJS:.o=.bin}
 # symlinks to tool bins without the ".bin" extension
 TOOL_BIN_LINKS := ${TOOL_BINS:.bin=}
 # Put the test binaries in build/test for convenience.
 TEST_BIN_DIR := $(BUILD_DIR)/test
 TEST_CU_BINS := $(addsuffix .testbin,$(addprefix $(TEST_BIN_DIR)/, \
 		$(foreach obj,$(TEST_CU_OBJS),$(basename $(notdir $(obj))))))
 TEST_CXX_BINS := $(addsuffix .testbin,$(addprefix $(TEST_BIN_DIR)/, \
 		$(foreach obj,$(TEST_CXX_OBJS),$(basename $(notdir $(obj))))))
 TEST_BINS := $(TEST_CXX_BINS) $(TEST_CU_BINS)
 # TEST_ALL_BIN is the test binary that links caffe dynamically.
 TEST_ALL_BIN := $(TEST_BIN_DIR)/test_all.testbin

 ##############################
 # Derive compiler warning dump locations
 ##############################
 WARNS_EXT := warnings.txt
 CXX_WARNS := $(addprefix $(BUILD_DIR)/, ${CXX_SRCS:.cpp=.o.$(WARNS_EXT)})
 CU_WARNS := $(addprefix $(BUILD_DIR)/cuda/, ${CU_SRCS:.cu=.o.$(WARNS_EXT)})
 TOOL_WARNS := $(addprefix $(BUILD_DIR)/, ${TOOL_SRCS:.cpp=.o.$(WARNS_EXT)})
 EXAMPLE_WARNS := $(addprefix $(BUILD_DIR)/, ${EXAMPLE_SRCS:.cpp=.o.$(WARNS_EXT)})
 TEST_WARNS := $(addprefix $(BUILD_DIR)/, ${TEST_SRCS:.cpp=.o.$(WARNS_EXT)})
 TEST_CU_WARNS := $(addprefix $(BUILD_DIR)/cuda/, ${TEST_CU_SRCS:.cu=.o.$(WARNS_EXT)})
 ALL_CXX_WARNS := $(CXX_WARNS) $(TOOL_WARNS) $(EXAMPLE_WARNS) $(TEST_WARNS)
 ALL_CU_WARNS := $(CU_WARNS) $(TEST_CU_WARNS)
 ALL_WARNS := $(ALL_CXX_WARNS) $(ALL_CU_WARNS)

 EMPTY_WARN_REPORT := $(BUILD_DIR)/.$(WARNS_EXT)
 NONEMPTY_WARN_REPORT := $(BUILD_DIR)/$(WARNS_EXT)

 ##############################
 # Derive include and lib directories
 ##############################
 CUDA_INCLUDE_DIR := $(CUDA_DIR)/include

 CUDA_LIB_DIR :=
 # add <cuda>/lib64 only if it exists
 ifneq ("$(wildcard $(CUDA_DIR)/lib64)","")
 	CUDA_LIB_DIR += $(CUDA_DIR)/lib64
 endif
 CUDA_LIB_DIR += $(CUDA_DIR)/lib

 INCLUDE_DIRS += $(BUILD_INCLUDE_DIR) ./src ./include
 ifneq ($(CPU_ONLY), 1)
 	INCLUDE_DIRS += $(CUDA_INCLUDE_DIR)
 	LIBRARY_DIRS += $(CUDA_LIB_DIR)
 	LIBRARIES := cudart cublas curand
 endif

 LIBRARIES += glog gflags protobuf boost_system boost_filesystem m hdf5_hl hdf5

 # handle IO dependencies
 USE_LEVELDB ?= 1
 USE_LMDB ?= 1
 USE_OPENCV ?= 1

 ifeq ($(USE_LEVELDB), 1)
 	LIBRARIES += leveldb snappy
 endif
 ifeq ($(USE_LMDB), 1)
 	LIBRARIES += lmdb
 endif
 ifeq ($(USE_OPENCV), 1)
 	LIBRARIES += opencv_core opencv_highgui opencv_imgproc

 	ifeq ($(OPENCV_VERSION), 3)
 		LIBRARIES += opencv_imgcodecs
 	endif

 endif
 PYTHON_LIBRARIES ?= boost_python python2.7
 WARNINGS := -Wall -Wno-sign-compare

 ##############################
 # Set build directories
 ##############################

 DISTRIBUTE_DIR ?= distribute
 DISTRIBUTE_SUBDIRS := $(DISTRIBUTE_DIR)/bin $(DISTRIBUTE_DIR)/lib
 DIST_ALIASES := dist
 ifneq ($(strip $(DISTRIBUTE_DIR)),distribute)
 		DIST_ALIASES += distribute
 endif

 ALL_BUILD_DIRS := $(sort $(BUILD_DIR) $(addprefix $(BUILD_DIR)/, $(SRC_DIRS)) \
 	$(addprefix $(BUILD_DIR)/cuda/, $(SRC_DIRS)) \
 	$(LIB_BUILD_DIR) $(TEST_BIN_DIR) $(PY_PROTO_BUILD_DIR) $(LINT_OUTPUT_DIR) \
 	$(DISTRIBUTE_SUBDIRS) $(PROTO_BUILD_INCLUDE_DIR))

 ##############################
 # Set directory for Doxygen-generated documentation
 ##############################
 DOXYGEN_CONFIG_FILE ?= ./.Doxyfile
 # should be the same as OUTPUT_DIRECTORY in the .Doxyfile
 DOXYGEN_OUTPUT_DIR ?= ./doxygen
 DOXYGEN_COMMAND ?= doxygen
 # All the files that might have Doxygen documentation.
 DOXYGEN_SOURCES := $(shell find \
 	src/$(PROJECT) \
 	include/$(PROJECT) \
 	python/ \
 	matlab/ \
 	examples \
 	tools \
 	-name "*.cpp" -or -name "*.hpp" -or -name "*.cu" -or -name "*.cuh" -or \
        -name "*.py" -or -name "*.m")
 DOXYGEN_SOURCES += $(DOXYGEN_CONFIG_FILE)


 ##############################
 # Configure build
 ##############################

 # Determine platform
 UNAME := $(shell uname -s)
 ifeq ($(UNAME), Linux)
 	LINUX := 1
 else ifeq ($(UNAME), Darwin)
 	OSX := 1
 	OSX_MAJOR_VERSION := $(shell sw_vers -productVersion | cut -f 1 -d .)
 	OSX_MINOR_VERSION := $(shell sw_vers -productVersion | cut -f 2 -d .)
 endif

 # Linux
 ifeq ($(LINUX), 1)
 	CXX ?= /usr/bin/g++
 	GCCVERSION := $(shell $(CXX) -dumpversion | cut -f1,2 -d.)
 	# older versions of gcc are too dumb to build boost with -Wuninitalized
 	ifeq ($(shell echo | awk '{exit $(GCCVERSION) < 4.6;}'), 1)
 		WARNINGS += -Wno-uninitialized
 	endif
 	# boost::thread is reasonably called boost_thread (compare OS X)
 	# We will also explicitly add stdc++ to the link target.
 	LIBRARIES += boost_thread stdc++
 	VERSIONFLAGS += -Wl,-soname,$(DYNAMIC_VERSIONED_NAME_SHORT) -Wl,-rpath,$(ORIGIN)/../lib
 endif

 # OS X:
 # clang++ instead of g++
 # libstdc++ for NVCC compatibility on OS X >= 10.9 with CUDA < 7.0
 ifeq ($(OSX), 1)
 	CXX := /usr/bin/clang++
 	ifneq ($(CPU_ONLY), 1)
 		CUDA_VERSION := $(shell $(CUDA_DIR)/bin/nvcc -V | grep -o 'release [0-9.]*' | tr -d '[a-z ]')
 		ifeq ($(shell echo | awk '{exit $(CUDA_VERSION) < 7.0;}'), 1)
 			CXXFLAGS += -stdlib=libstdc++
 			LINKFLAGS += -stdlib=libstdc++
 		endif
 		# clang throws this warning for cuda headers
 		WARNINGS += -Wno-unneeded-internal-declaration
 		# 10.11 strips DYLD_* env vars so link CUDA (rpath is available on 10.5+)
 		OSX_10_OR_LATER   := $(shell [ $(OSX_MAJOR_VERSION) -ge 10 ] && echo true)
 		OSX_10_5_OR_LATER := $(shell [ $(OSX_MINOR_VERSION) -ge 5 ] && echo true)
 		ifeq ($(OSX_10_OR_LATER),true)
 			ifeq ($(OSX_10_5_OR_LATER),true)
 				LDFLAGS += -Wl,-rpath,$(CUDA_LIB_DIR)
 			endif
 		endif
 	endif
 	# gtest needs to use its own tuple to not conflict with clang
 	COMMON_FLAGS += -DGTEST_USE_OWN_TR1_TUPLE=1
 	# boost::thread is called boost_thread-mt to mark multithreading on OS X
 	LIBRARIES += boost_thread-mt
 	# we need to explicitly ask for the rpath to be obeyed
 	ORIGIN := @loader_path
 	VERSIONFLAGS += -Wl,-install_name,@rpath/$(DYNAMIC_VERSIONED_NAME_SHORT) -Wl,-rpath,$(ORIGIN)/../../build/lib
 else
 	ORIGIN := \$$ORIGIN
 endif

 # Custom compiler
 ifdef CUSTOM_CXX
 	CXX := $(CUSTOM_CXX)
 endif

 # Static linking
 ifneq (,$(findstring clang++,$(CXX)))
 	STATIC_LINK_COMMAND := -Wl,-force_load $(STATIC_NAME)
 else ifneq (,$(findstring g++,$(CXX)))
 	STATIC_LINK_COMMAND := -Wl,--whole-archive $(STATIC_NAME) -Wl,--no-whole-archive
 else
  # The following line must not be indented with a tab, since we are not inside a target
  $(error Cannot static link with the $(CXX) compiler)
 endif

 # Debugging
 ifeq ($(DEBUG), 1)
 	COMMON_FLAGS += -DDEBUG -g -O0
 	NVCCFLAGS += -G
 else
 	COMMON_FLAGS += -DNDEBUG -O2
 endif

 # cuDNN acceleration configuration.
 ifeq ($(USE_CUDNN), 1)
 	LIBRARIES += cudnn
 	COMMON_FLAGS += -DUSE_CUDNN
 endif

 # NCCL acceleration configuration
 ifeq ($(USE_NCCL), 1)
 	LIBRARIES += nccl
 	COMMON_FLAGS += -DUSE_NCCL
 endif

 # configure IO libraries
 ifeq ($(USE_OPENCV), 1)
 	COMMON_FLAGS += -DUSE_OPENCV
 endif
 ifeq ($(USE_LEVELDB), 1)
 	COMMON_FLAGS += -DUSE_LEVELDB
 endif
 ifeq ($(USE_LMDB), 1)
 	COMMON_FLAGS += -DUSE_LMDB
 ifeq ($(ALLOW_LMDB_NOLOCK), 1)
 	COMMON_FLAGS += -DALLOW_LMDB_NOLOCK
 endif
 endif

 # CPU-only configuration
 ifeq ($(CPU_ONLY), 1)
 	OBJS := $(PROTO_OBJS) $(CXX_OBJS)
 	TEST_OBJS := $(TEST_CXX_OBJS)
 	TEST_BINS := $(TEST_CXX_BINS)
 	ALL_WARNS := $(ALL_CXX_WARNS)
 	TEST_FILTER := --gtest_filter="-*GPU*"
 	COMMON_FLAGS += -DCPU_ONLY
 endif

 # Python layer support
 ifeq ($(WITH_PYTHON_LAYER), 1)
 	COMMON_FLAGS += -DWITH_PYTHON_LAYER
 	LIBRARIES += $(PYTHON_LIBRARIES)
 endif

 # BLAS configuration (default = ATLAS)
 BLAS ?= atlas
 ifeq ($(BLAS), mkl)
 	# MKL
 	LIBRARIES += mkl_rt
 	COMMON_FLAGS += -DUSE_MKL
 	MKLROOT ?= /opt/intel/mkl
 	BLAS_INCLUDE ?= $(MKLROOT)/include
 	BLAS_LIB ?= $(MKLROOT)/lib $(MKLROOT)/lib/intel64
 else ifeq ($(BLAS), open)
 	# OpenBLAS
 	LIBRARIES += openblas
 else
 	# ATLAS
 	ifeq ($(LINUX), 1)
 		ifeq ($(BLAS), atlas)
 			# Linux simply has cblas and atlas
 			LIBRARIES += cblas atlas
 		endif
 	else ifeq ($(OSX), 1)
 		# OS X packages atlas as the vecLib framework
 		LIBRARIES += cblas
 		# 10.10 has accelerate while 10.9 has veclib
 		XCODE_CLT_VER := $(shell pkgutil --pkg-info=com.apple.pkg.CLTools_Executables | grep 'version' | sed 's/[^0-9]*\([0-9]\).*/\1/')
 		XCODE_CLT_GEQ_7 := $(shell [ $(XCODE_CLT_VER) -gt 6 ] && echo 1)
 		XCODE_CLT_GEQ_6 := $(shell [ $(XCODE_CLT_VER) -gt 5 ] && echo 1)
 		ifeq ($(XCODE_CLT_GEQ_7), 1)
 			BLAS_INCLUDE ?= /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/$(shell ls /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/ | sort | tail -1)/System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/Headers
 		else ifeq ($(XCODE_CLT_GEQ_6), 1)
 			BLAS_INCLUDE ?= /System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks/vecLib.framework/Headers/
 			LDFLAGS += -framework Accelerate
 		else
 			BLAS_INCLUDE ?= /System/Library/Frameworks/vecLib.framework/Versions/Current/Headers/
 			LDFLAGS += -framework vecLib
 		endif
 	endif
 endif
 INCLUDE_DIRS += $(BLAS_INCLUDE)
 LIBRARY_DIRS += $(BLAS_LIB)

 LIBRARY_DIRS += $(LIB_BUILD_DIR)

 # Automatic dependency generation (nvcc is handled separately)
 CXXFLAGS += -MMD -MP

 # Complete build flags.
 COMMON_FLAGS += $(foreach includedir,$(INCLUDE_DIRS),-I$(includedir))
 CXXFLAGS += -pthread -fPIC $(COMMON_FLAGS) $(WARNINGS)
 NVCCFLAGS += -ccbin=$(CXX) -Xcompiler -fPIC $(COMMON_FLAGS)
 # mex may invoke an older gcc that is too liberal with -Wuninitalized
 MATLAB_CXXFLAGS := $(CXXFLAGS) -Wno-uninitialized
 LINKFLAGS += -pthread -fPIC $(COMMON_FLAGS) $(WARNINGS)

 USE_PKG_CONFIG ?= 0
 ifeq ($(USE_PKG_CONFIG), 1)
 	PKG_CONFIG := $(shell pkg-config opencv --libs)
 else
 	PKG_CONFIG :=
 endif

 LIBRARIES += restclient-cpp curl

 LDFLAGS += $(foreach librarydir,$(LIBRARY_DIRS),-L$(librarydir)) $(PKG_CONFIG) \
 		$(foreach library,$(LIBRARIES),-l$(library))
 PYTHON_LDFLAGS := $(LDFLAGS) $(foreach library,$(PYTHON_LIBRARIES),-l$(library))

 # 'superclean' target recursively* deletes all files ending with an extension
 # in $(SUPERCLEAN_EXTS) below.  This may be useful if you've built older
 # versions of Caffe that do not place all generated files in a location known
 # to the 'clean' target.
 #
 # 'supercleanlist' will list the files to be deleted by make superclean.
 #
 # * Recursive with the exception that symbolic links are never followed, per the
 # default behavior of 'find'.
 SUPERCLEAN_EXTS := .so .a .o .bin .testbin .pb.cc .pb.h _pb2.py .cuo

 # Set the sub-targets of the 'everything' target.
 EVERYTHING_TARGETS := all py$(PROJECT) test warn lint
 # Only build matcaffe as part of "everything" if MATLAB_DIR is specified.
 ifneq ($(MATLAB_DIR),)
 	EVERYTHING_TARGETS += mat$(PROJECT)
 endif

 ##############################
 # Define build targets
 ##############################
 .PHONY: all lib test clean docs linecount lint lintclean tools examples $(DIST_ALIASES) \
 	py mat py$(PROJECT) mat$(PROJECT) proto runtest \
 	superclean supercleanlist supercleanfiles warn everything

 all: lib tools examples

 lib: $(STATIC_NAME) $(DYNAMIC_NAME)

 everything: $(EVERYTHING_TARGETS)

 linecount:
 	cloc --read-lang-def=$(PROJECT).cloc \
 		src/$(PROJECT) include/$(PROJECT) tools examples \
 		python matlab

 lint: $(EMPTY_LINT_REPORT)

 lintclean:
 	@ $(RM) -r $(LINT_OUTPUT_DIR) $(EMPTY_LINT_REPORT) $(NONEMPTY_LINT_REPORT)

 docs: $(DOXYGEN_OUTPUT_DIR)
 	@ cd ./docs ; ln -sfn ../$(DOXYGEN_OUTPUT_DIR)/html doxygen

 $(DOXYGEN_OUTPUT_DIR): $(DOXYGEN_CONFIG_FILE) $(DOXYGEN_SOURCES)
 	$(DOXYGEN_COMMAND) $(DOXYGEN_CONFIG_FILE)

 $(EMPTY_LINT_REPORT): $(LINT_OUTPUTS) | $(BUILD_DIR)
 	@ cat $(LINT_OUTPUTS) > $@
 	@ if [ -s "$@" ]; then \
 		cat $@; \
 		mv $@ $(NONEMPTY_LINT_REPORT); \
 		echo "Found one or more lint errors."; \
 		exit 1; \
 	  fi; \
 	  $(RM) $(NONEMPTY_LINT_REPORT); \
 	  echo "No lint errors!";

 $(LINT_OUTPUTS): $(LINT_OUTPUT_DIR)/%.lint.txt : % $(LINT_SCRIPT) | $(LINT_OUTPUT_DIR)
 	@ mkdir -p $(dir $@)
 	@ python $(LINT_SCRIPT) $< 2>&1 \
 		| grep -v "^Done processing " \
 		| grep -v "^Total errors found: 0" \
 		> $@ \
 		|| true

 test: $(TEST_ALL_BIN) $(TEST_ALL_DYNLINK_BIN) $(TEST_BINS)

 tools: $(TOOL_BINS) $(TOOL_BIN_LINKS)

 examples: $(EXAMPLE_BINS)

 py$(PROJECT): py

 py: $(PY$(PROJECT)_SO) $(PROTO_GEN_PY)

 $(PY$(PROJECT)_SO): $(PY$(PROJECT)_SRC) $(PY$(PROJECT)_HXX) | $(DYNAMIC_NAME)
 	@ echo CXX/LD -o $@ $<
 	$(Q)$(CXX) -shared -o $@ $(PY$(PROJECT)_SRC) \
 		-o $@ $(LINKFLAGS) -l$(LIBRARY_NAME) $(PYTHON_LDFLAGS) \
 		-Wl,-rpath,$(ORIGIN)/../../build/lib

 mat$(PROJECT): mat

 mat: $(MAT$(PROJECT)_SO)

 $(MAT$(PROJECT)_SO): $(MAT$(PROJECT)_SRC) $(STATIC_NAME)
 	@ if [ -z "$(MATLAB_DIR)" ]; then \
 		echo "MATLAB_DIR must be specified in $(CONFIG_FILE)" \
 			"to build mat$(PROJECT)."; \
 		exit 1; \
 	fi
 	@ echo MEX $<
 	$(Q)$(MATLAB_DIR)/bin/mex $(MAT$(PROJECT)_SRC) \
 			CXX="$(CXX)" \
 			CXXFLAGS="\$$CXXFLAGS $(MATLAB_CXXFLAGS)" \
 			CXXLIBS="\$$CXXLIBS $(STATIC_LINK_COMMAND) $(LDFLAGS)" -output $@
 	@ if [ -f "$(PROJECT)_.d" ]; then \
 		mv -f $(PROJECT)_.d $(BUILD_DIR)/${MAT$(PROJECT)_SO:.$(MAT_SO_EXT)=.d}; \
 	fi

 runtest: $(TEST_ALL_BIN)
 	$(TOOL_BUILD_DIR)/caffe
 	$(TEST_ALL_BIN) $(TEST_GPUID) --gtest_shuffle $(TEST_FILTER)

 pytest: py
 	cd python; python -m unittest discover -s caffe/test

 mattest: mat
 	cd matlab; $(MATLAB_DIR)/bin/matlab -nodisplay -r 'caffe.run_tests(), exit()'

 warn: $(EMPTY_WARN_REPORT)

 $(EMPTY_WARN_REPORT): $(ALL_WARNS) | $(BUILD_DIR)
 	@ cat $(ALL_WARNS) > $@
 	@ if [ -s "$@" ]; then \
 		cat $@; \
 		mv $@ $(NONEMPTY_WARN_REPORT); \
 		echo "Compiler produced one or more warnings."; \
 		exit 1; \
 	  fi; \
 	  $(RM) $(NONEMPTY_WARN_REPORT); \
 	  echo "No compiler warnings!";

 $(ALL_WARNS): %.o.$(WARNS_EXT) : %.o

 $(BUILD_DIR_LINK): $(BUILD_DIR)/.linked

 # Create a target ".linked" in this BUILD_DIR to tell Make that the "build" link
 # is currently correct, then delete the one in the OTHER_BUILD_DIR in case it
 # exists and $(DEBUG) is toggled later.
 $(BUILD_DIR)/.linked:
 	@ mkdir -p $(BUILD_DIR)
 	@ $(RM) $(OTHER_BUILD_DIR)/.linked
 	@ $(RM) -r $(BUILD_DIR_LINK)
 	@ ln -s $(BUILD_DIR) $(BUILD_DIR_LINK)
 	@ touch $@

 $(ALL_BUILD_DIRS): | $(BUILD_DIR_LINK)
 	@ mkdir -p $@

 $(DYNAMIC_NAME): $(OBJS) | $(LIB_BUILD_DIR)
 	@ echo LD -o $@
 	$(Q)$(CXX) -shared -o $@ $(OBJS) $(VERSIONFLAGS) $(LINKFLAGS) $(LDFLAGS)
 	@ cd $(BUILD_DIR)/lib; rm -f $(DYNAMIC_NAME_SHORT);   ln -s $(DYNAMIC_VERSIONED_NAME_SHORT) $(DYNAMIC_NAME_SHORT)

 $(STATIC_NAME): $(OBJS) | $(LIB_BUILD_DIR)
 	@ echo AR -o $@
 	$(Q)ar rcs $@ $(OBJS)

 $(BUILD_DIR)/%.o: %.cpp | $(ALL_BUILD_DIRS)
 	@ echo CXX $<
 	$(Q)$(CXX) $< $(CXXFLAGS) -c -o $@ 2> $@.$(WARNS_EXT) \
 		|| (cat $@.$(WARNS_EXT); exit 1)
 	@ cat $@.$(WARNS_EXT)

 $(PROTO_BUILD_DIR)/%.pb.o: $(PROTO_BUILD_DIR)/%.pb.cc $(PROTO_GEN_HEADER) \
 		| $(PROTO_BUILD_DIR)
 	@ echo CXX $<
 	$(Q)$(CXX) $< $(CXXFLAGS) -c -o $@ 2> $@.$(WARNS_EXT) \
 		|| (cat $@.$(WARNS_EXT); exit 1)
 	@ cat $@.$(WARNS_EXT)

 $(BUILD_DIR)/cuda/%.o: %.cu | $(ALL_BUILD_DIRS)
 	@ echo NVCC $<
 	$(Q)$(CUDA_DIR)/bin/nvcc $(NVCCFLAGS) $(CUDA_ARCH) -M $< -o ${@:.o=.d} \
 		-odir $(@D)
 	$(Q)$(CUDA_DIR)/bin/nvcc $(NVCCFLAGS) $(CUDA_ARCH) -c $< -o $@ 2> $@.$(WARNS_EXT) \
 		|| (cat $@.$(WARNS_EXT); exit 1)
 	@ cat $@.$(WARNS_EXT)

 $(TEST_ALL_BIN): $(TEST_MAIN_SRC) $(TEST_OBJS) $(GTEST_OBJ) \
 		| $(DYNAMIC_NAME) $(TEST_BIN_DIR)
 	@ echo CXX/LD -o $@ $<
 	$(Q)$(CXX) $(TEST_MAIN_SRC) $(TEST_OBJS) $(GTEST_OBJ) \
 		-o $@ $(LINKFLAGS) $(LDFLAGS) -l$(LIBRARY_NAME) -Wl,-rpath,$(ORIGIN)/../lib

 $(TEST_CU_BINS): $(TEST_BIN_DIR)/%.testbin: $(TEST_CU_BUILD_DIR)/%.o \
 	$(GTEST_OBJ) | $(DYNAMIC_NAME) $(TEST_BIN_DIR)
 	@ echo LD $<
 	$(Q)$(CXX) $(TEST_MAIN_SRC) $< $(GTEST_OBJ) \
 		-o $@ $(LINKFLAGS) $(LDFLAGS) -l$(LIBRARY_NAME) -Wl,-rpath,$(ORIGIN)/../lib

 $(TEST_CXX_BINS): $(TEST_BIN_DIR)/%.testbin: $(TEST_CXX_BUILD_DIR)/%.o \
 	$(GTEST_OBJ) | $(DYNAMIC_NAME) $(TEST_BIN_DIR)
 	@ echo LD $<
 	$(Q)$(CXX) $(TEST_MAIN_SRC) $< $(GTEST_OBJ) \
 		-o $@ $(LINKFLAGS) $(LDFLAGS) -l$(LIBRARY_NAME) -Wl,-rpath,$(ORIGIN)/../lib

 # Target for extension-less symlinks to tool binaries with extension '*.bin'.
 $(TOOL_BUILD_DIR)/%: $(TOOL_BUILD_DIR)/%.bin | $(TOOL_BUILD_DIR)
 	@ $(RM) $@
 	@ ln -s $(notdir $<) $@

 $(TOOL_BINS): %.bin : %.o | $(DYNAMIC_NAME)
 	@ echo CXX/LD -o $@
 	$(Q)$(CXX) $< -o $@ $(LINKFLAGS) -l$(LIBRARY_NAME) $(LDFLAGS) \
 		-Wl,-rpath,$(ORIGIN)/../lib

 $(EXAMPLE_BINS): %.bin : %.o | $(DYNAMIC_NAME)
 	@ echo CXX/LD -o $@
 	$(Q)$(CXX) $< -o $@ $(LINKFLAGS) -l$(LIBRARY_NAME) $(LDFLAGS) \
 		-Wl,-rpath,$(ORIGIN)/../../lib

 proto: $(PROTO_GEN_CC) $(PROTO_GEN_HEADER)

 $(PROTO_BUILD_DIR)/%.pb.cc $(PROTO_BUILD_DIR)/%.pb.h : \
 		$(PROTO_SRC_DIR)/%.proto | $(PROTO_BUILD_DIR)
 	@ echo PROTOC $<
 	$(Q)protoc --proto_path=$(PROTO_SRC_DIR) --cpp_out=$(PROTO_BUILD_DIR) $<

 $(PY_PROTO_BUILD_DIR)/%_pb2.py : $(PROTO_SRC_DIR)/%.proto \
 		$(PY_PROTO_INIT) | $(PY_PROTO_BUILD_DIR)
 	@ echo PROTOC \(python\) $<
 	$(Q)protoc --proto_path=$(PROTO_SRC_DIR) --python_out=$(PY_PROTO_BUILD_DIR) $<

 $(PY_PROTO_INIT): | $(PY_PROTO_BUILD_DIR)
 	touch $(PY_PROTO_INIT)

 clean:
 	@- $(RM) -rf $(ALL_BUILD_DIRS)
 	@- $(RM) -rf $(OTHER_BUILD_DIR)
 	@- $(RM) -rf $(BUILD_DIR_LINK)
 	@- $(RM) -rf $(DISTRIBUTE_DIR)
 	@- $(RM) $(PY$(PROJECT)_SO)
 	@- $(RM) $(MAT$(PROJECT)_SO)

 supercleanfiles:
 	$(eval SUPERCLEAN_FILES := $(strip \
 			$(foreach ext,$(SUPERCLEAN_EXTS), $(shell find . -name '*$(ext)' \
 			-not -path './data/*'))))

 supercleanlist: supercleanfiles
 	@ \
 	if [ -z "$(SUPERCLEAN_FILES)" ]; then \
 		echo "No generated files found."; \
 	else \
 		echo $(SUPERCLEAN_FILES) | tr ' ' '\n'; \
 	fi

 superclean: clean supercleanfiles
 	@ \
 	if [ -z "$(SUPERCLEAN_FILES)" ]; then \
 		echo "No generated files found."; \
 	else \
 		echo "Deleting the following generated files:"; \
 		echo $(SUPERCLEAN_FILES) | tr ' ' '\n'; \
 		$(RM) $(SUPERCLEAN_FILES); \
 	fi

 $(DIST_ALIASES): $(DISTRIBUTE_DIR)

 $(DISTRIBUTE_DIR): all py | $(DISTRIBUTE_SUBDIRS)
 	# add proto
 	cp -r src/caffe/proto $(DISTRIBUTE_DIR)/
 	# add include
 	cp -r include $(DISTRIBUTE_DIR)/
 	mkdir -p $(DISTRIBUTE_DIR)/include/caffe/proto
 	cp $(PROTO_GEN_HEADER_SRCS) $(DISTRIBUTE_DIR)/include/caffe/proto
 	# add tool and example binaries
 	cp $(TOOL_BINS) $(DISTRIBUTE_DIR)/bin
 	cp $(EXAMPLE_BINS) $(DISTRIBUTE_DIR)/bin
 	# add libraries
 	cp $(STATIC_NAME) $(DISTRIBUTE_DIR)/lib
 	install -m 644 $(DYNAMIC_NAME) $(DISTRIBUTE_DIR)/lib
 	cd $(DISTRIBUTE_DIR)/lib; rm -f $(DYNAMIC_NAME_SHORT);   ln -s $(DYNAMIC_VERSIONED_NAME_SHORT) $(DYNAMIC_NAME_SHORT)
 	# add python - it's not the standard way, indeed...
 	cp -r python $(DISTRIBUTE_DIR)/python

 -include $(DEPS)
diff --git a/Makefile.config b/Makefile.config
 ## Refer to http://caffe.berkeleyvision.org/installation.html
 # Contributions simplifying and improving our build system are welcome!

 # cuDNN acceleration switch (uncomment to build with cuDNN).
 USE_CUDNN := 1

 # CPU-only switch (uncomment to build without GPU support).
 # CPU_ONLY := 1

 # uncomment to disable IO dependencies and corresponding data layers
 # USE_OPENCV := 0
 # USE_LEVELDB := 0
 # USE_LMDB := 0

 # uncomment to allow MDB_NOLOCK when reading LMDB files (only if necessary)
 #	You should not set this flag if you will be reading LMDBs with any
 #	possibility of simultaneous read and write
 # ALLOW_LMDB_NOLOCK := 1

 # Uncomment if you're using OpenCV 3
 # OPENCV_VERSION := 3

 # To customize your choice of compiler, uncomment and set the following.
 # N.B. the default for Linux is g++ and the default for OSX is clang++
 # CUSTOM_CXX := g++

 # CUDA directory contains bin/ and lib/ directories that we need.
 CUDA_DIR := /usr/local/cuda
 # On Ubuntu 14.04, if cuda tools are installed via
 # "sudo apt-get install nvidia-cuda-toolkit" then use this instead:
 # CUDA_DIR := /usr

 # CUDA architecture setting: going with all of them.
 # For CUDA < 6.0, comment the *_50 through *_61 lines for compatibility.
 # For CUDA < 8.0, comment the *_60 and *_61 lines for compatibility.
 CUDA_ARCH := -gencode arch=compute_20,code=sm_20 \
 		-gencode arch=compute_20,code=sm_21 \
 		-gencode arch=compute_30,code=sm_30 \
 		-gencode arch=compute_35,code=sm_35 \
 		-gencode arch=compute_50,code=sm_50 \
 		-gencode arch=compute_52,code=sm_52 \
 		-gencode arch=compute_60,code=sm_60 \
 		-gencode arch=compute_61,code=sm_61 \
 		-gencode arch=compute_61,code=compute_61

 # BLAS choice:
 # atlas for ATLAS (default)
 # mkl for MKL
 # open for OpenBlas
 BLAS := atlas
 # Custom (MKL/ATLAS/OpenBLAS) include and lib directories.
 # Leave commented to accept the defaults for your choice of BLAS
 # (which should work)!
 # BLAS_INCLUDE := /path/to/your/blas
 # BLAS_LIB := /path/to/your/blas

 # Homebrew puts openblas in a directory that is not on the standard search path
 # BLAS_INCLUDE := $(shell brew --prefix openblas)/include
 # BLAS_LIB := $(shell brew --prefix openblas)/lib

 # This is required only if you will compile the matlab interface.
 # MATLAB directory should contain the mex binary in /bin.
 # MATLAB_DIR := /usr/local
 # MATLAB_DIR := /Applications/MATLAB_R2012b.app

 # NOTE: this is required only if you will compile the python interface.
 # We need to be able to find Python.h and numpy/arrayobject.h.
 PYTHON_INCLUDE := /usr/include/python2.7 \
 		/usr/lib/python2.7/dist-packages/numpy/core/include
 # Anaconda Python distribution is quite popular. Include path:
 # Verify anaconda location, sometimes it's in root.
 # ANACONDA_HOME := $(HOME)/anaconda
 # PYTHON_INCLUDE := $(ANACONDA_HOME)/include \
 		# $(ANACONDA_HOME)/include/python2.7 \
 		# $(ANACONDA_HOME)/lib/python2.7/site-packages/numpy/core/include

 # Uncomment to use Python 3 (default is Python 2)
 # PYTHON_LIBRARIES := boost_python3 python3.5m
 # PYTHON_INCLUDE := /usr/include/python3.5m \
 #                 /usr/lib/python3.5/dist-packages/numpy/core/include

 # We need to be able to find libpythonX.X.so or .dylib.
 PYTHON_LIB := /usr/lib
 # PYTHON_LIB := $(ANACONDA_HOME)/lib

 # Homebrew installs numpy in a non standard path (keg only)
 # PYTHON_INCLUDE += $(dir $(shell python -c 'import numpy.core; print(numpy.core.__file__)'))/include
 # PYTHON_LIB += $(shell brew --prefix numpy)/lib

 # Uncomment to support layers written in Python (will link against Python libs)
 WITH_PYTHON_LAYER := 1

 # Whatever else you find you need goes here.
 # INCLUDE_DIRS := $(PYTHON_INCLUDE) $(HOME)/opencv_3.2.0/include /usr/local/include /usr/include/hdf5/serial
 # LIBRARY_DIRS := $(PYTHON_LIB) $(HOME)/opencv_3.2.0/lib /usr/local/lib /usr/lib /usr/lib/x86_64-linux-gnu /usr/lib/x86_64-linux-gnu/hdf5/serial
 INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include /usr/include/hdf5/serial $(HOME)/restclient-cpp/build/include/
 LIBRARY_DIRS := $(PYTHON_LIB)  /usr/local/lib /usr/lib /usr/lib/x86_64-linux-gnu /usr/lib/x86_64-linux-gnu/hdf5/serial $(HOME)/restclient-cpp/build/lib/

 # If Homebrew is installed at a non standard location (for example your home directory) and you use it for general dependencies
 # INCLUDE_DIRS += $(shell brew --prefix)/include
 # LIBRARY_DIRS += $(shell brew --prefix)/lib

 # NCCL acceleration switch (uncomment to build with NCCL)
 # https://github.com/NVIDIA/nccl (last tested version: v1.2.3-1+cuda8.0)
 # USE_NCCL := 1

 # Uncomment to use `pkg-config` to specify OpenCV library paths.
 # (Usually not necessary -- OpenCV libraries are normally installed in one of the above $LIBRARY_DIRS.)
 # USE_PKG_CONFIG := 1

 # N.B. both build and distribute dirs are cleared on `make clean`
 BUILD_DIR := build
 DISTRIBUTE_DIR := distribute

 # Uncomment for debugging. Does not work on OSX due to https://github.com/BVLC/caffe/issues/171
 # DEBUG := 1

 # The ID of the GPU that 'make runtest' will use to run unit tests.
 TEST_GPUID := 0

 # enable pretty build (comment to see full commands)
 Q ?= @

 # If Homebrew is installed at a non standard location (for example your home directory) and you use it for general dependencies
 # INCLUDE_DIRS += $(shell brew --prefix)/include
 # LIBRARY_DIRS += $(shell brew --prefix)/lib

 # NCCL acceleration switch (uncomment to build with NCCL)
 # https://github.com/NVIDIA/nccl (last tested version: v1.2.3-1+cuda8.0)
 # USE_NCCL := 1

 # Uncomment to use `pkg-config` to specify OpenCV library paths.
 # (Usually not necessary -- OpenCV libraries are normally installed in one of the above $LIBRARY_DIRS.)
 # USE_PKG_CONFIG := 1

 # N.B. both build and distribute dirs are cleared on `make clean`
 BUILD_DIR := build
 DISTRIBUTE_DIR := distribute

 # Uncomment for debugging. Does not work on OSX due to https://github.com/BVLC/caffe/issues/171
 # DEBUG := 1

 # The ID of the GPU that 'make runtest' will use to run unit tests.
 TEST_GPUID := 0

 # enable pretty build (comment to see full commands)
 Q ?= @
diff --git a/solver.cpp b/solver.cpp
 #include <cstdio>

 #include <string>
 #include <vector>

 #include "restclient-cpp/restclient.h"
 #include "restclient-cpp/connection.h"
 #include "caffe/solver.hpp"
 #include "caffe/util/format.hpp"
 #include "caffe/util/hdf5.hpp"
 #include "caffe/util/io.hpp"
 #include "caffe/util/upgrade_proto.hpp"

 namespace patch
 {
    template < typename T > std::string to_string( const T& n )
    {
        std::ostringstream stm ;
        stm << n ;
        return stm.str() ;
    }
 }

 namespace caffe {

 template<typename Dtype>
 void Solver<Dtype>::SetActionFunction(ActionCallback func) {
  action_request_function_ = func;
 }

 template<typename Dtype>
 SolverAction::Enum Solver<Dtype>::GetRequestedAction() {
  if (action_request_function_) {
    // If the external request function has been set, call it.
    return action_request_function_();
  }
  return SolverAction::NONE;
 }

 template <typename Dtype>
 Solver<Dtype>::Solver(const SolverParameter& param)
    : net_(), callbacks_(), requested_early_exit_(false) {
  Init(param);
 }

 template <typename Dtype>
 Solver<Dtype>::Solver(const string& param_file)
    : net_(), callbacks_(), requested_early_exit_(false) {
  SolverParameter param;
  ReadSolverParamsFromTextFileOrDie(param_file, &param);
  Init(param);
 }

 template <typename Dtype>
 void Solver<Dtype>::Init(const SolverParameter& param) {
  LOG(INFO) << "init" << param.DebugString();
  LOG_IF(INFO, Caffe::root_solver()) << "Initializing solver from parameters: "
    << std::endl << param.DebugString();
  LOG(INFO) << "init done";
  param_ = param;
  CHECK_GE(param_.average_loss(), 1) << "average_loss should be non-negative.";
  CheckSnapshotWritePermissions();
  if (param_.random_seed() >= 0) {
    Caffe::set_random_seed(param_.random_seed() + Caffe::solver_rank());
  }
  // Scaffolding code
  InitTrainNet();
  InitTestNets();
  if (Caffe::root_solver()) {
    LOG(INFO) << "Solver scaffolding done.";
  }
  iter_ = 0;
  current_step_ = 0;
 }

 template <typename Dtype>
 void Solver<Dtype>::InitTrainNet() {
  const int num_train_nets = param_.has_net() + param_.has_net_param() +
      param_.has_train_net() + param_.has_train_net_param();
  const string& field_names = "net, net_param, train_net, train_net_param";
  CHECK_GE(num_train_nets, 1) << "SolverParameter must specify a train net "
      << "using one of these fields: " << field_names;
  CHECK_LE(num_train_nets, 1) << "SolverParameter must not contain more than "
      << "one of these fields specifying a train_net: " << field_names;
  NetParameter net_param;
  if (param_.has_train_net_param()) {
    LOG_IF(INFO, Caffe::root_solver())
        << "Creating training net specified in train_net_param.";
    net_param.CopyFrom(param_.train_net_param());
  } else if (param_.has_train_net()) {
    LOG_IF(INFO, Caffe::root_solver())
        << "Creating training net from train_net file: " << param_.train_net();
    ReadNetParamsFromTextFileOrDie(param_.train_net(), &net_param);
  }
  if (param_.has_net_param()) {
    LOG_IF(INFO, Caffe::root_solver())
        << "Creating training net specified in net_param.";
    net_param.CopyFrom(param_.net_param());
  }
  if (param_.has_net()) {
    LOG_IF(INFO, Caffe::root_solver())
        << "Creating training net from net file: " << param_.net();
    ReadNetParamsFromTextFileOrDie(param_.net(), &net_param);
  }
  // Set the correct NetState.  We start with the solver defaults (lowest
  // precedence); then, merge in any NetState specified by the net_param itself;
  // finally, merge in any NetState specified by the train_state (highest
  // precedence).
  NetState net_state;
  net_state.set_phase(TRAIN);
  net_state.MergeFrom(net_param.state());
  net_state.MergeFrom(param_.train_state());
  net_param.mutable_state()->CopyFrom(net_state);
  net_.reset(new Net<Dtype>(net_param));
 }

 template <typename Dtype>
 void Solver<Dtype>::InitTestNets() {
  const bool has_net_param = param_.has_net_param();
  const bool has_net_file = param_.has_net();
  const int num_generic_nets = has_net_param + has_net_file;
  CHECK_LE(num_generic_nets, 1)
      << "Both net_param and net_file may not be specified.";
  const int num_test_net_params = param_.test_net_param_size();
  const int num_test_net_files = param_.test_net_size();
  const int num_test_nets = num_test_net_params + num_test_net_files;
  if (num_generic_nets) {
      CHECK_GE(param_.test_iter_size(), num_test_nets)
          << "test_iter must be specified for each test network.";
  } else {
      CHECK_EQ(param_.test_iter_size(), num_test_nets)
          << "test_iter must be specified for each test network.";
  }
  // If we have a generic net (specified by net or net_param, rather than
  // test_net or test_net_param), we may have an unlimited number of actual
  // test networks -- the actual number is given by the number of remaining
  // test_iters after any test nets specified by test_net_param and/or test_net
  // are evaluated.
  const int num_generic_net_instances = param_.test_iter_size() - num_test_nets;
  const int num_test_net_instances = num_test_nets + num_generic_net_instances;
  if (param_.test_state_size()) {
    CHECK_EQ(param_.test_state_size(), num_test_net_instances)
        << "test_state must be unspecified or specified once per test net.";
  }
  if (num_test_net_instances) {
    CHECK_GT(param_.test_interval(), 0);
  }
  int test_net_id = 0;
  vector<string> sources(num_test_net_instances);
  vector<NetParameter> net_params(num_test_net_instances);
  for (int i = 0; i < num_test_net_params; ++i, ++test_net_id) {
      sources[test_net_id] = "test_net_param";
      net_params[test_net_id].CopyFrom(param_.test_net_param(i));
  }
  for (int i = 0; i < num_test_net_files; ++i, ++test_net_id) {
      sources[test_net_id] = "test_net file: " + param_.test_net(i);
      ReadNetParamsFromTextFileOrDie(param_.test_net(i),
          &net_params[test_net_id]);
  }
  const int remaining_test_nets = param_.test_iter_size() - test_net_id;
  if (has_net_param) {
    for (int i = 0; i < remaining_test_nets; ++i, ++test_net_id) {
      sources[test_net_id] = "net_param";
      net_params[test_net_id].CopyFrom(param_.net_param());
    }
  }
  if (has_net_file) {
    for (int i = 0; i < remaining_test_nets; ++i, ++test_net_id) {
      sources[test_net_id] = "net file: " + param_.net();
      ReadNetParamsFromTextFileOrDie(param_.net(), &net_params[test_net_id]);
    }
  }
  test_nets_.resize(num_test_net_instances);
  for (int i = 0; i < num_test_net_instances; ++i) {
    // Set the correct NetState.  We start with the solver defaults (lowest
    // precedence); then, merge in any NetState specified by the net_param
    // itself; finally, merge in any NetState specified by the test_state
    // (highest precedence).
    NetState net_state;
    net_state.set_phase(TEST);
    net_state.MergeFrom(net_params[i].state());
    if (param_.test_state_size()) {
      net_state.MergeFrom(param_.test_state(i));
    }
    net_params[i].mutable_state()->CopyFrom(net_state);
    LOG(INFO)
        << "Creating test net (#" << i << ") specified by " << sources[i];
    test_nets_[i].reset(new Net<Dtype>(net_params[i]));
    test_nets_[i]->set_debug_info(param_.debug_info());
  }
 }

 template <typename Dtype>
 void Solver<Dtype>::Step(int iters) {
  const int start_iter = iter_;
  const int stop_iter = iter_ + iters;
  int average_loss = this->param_.average_loss();
  losses_.clear();
  smoothed_loss_ = 0;
  iteration_timer_.Start();
  string url = "";

  // initialize RestClient
  CHECK_EQ(RestClient::init(), 0) << "libcurl init error";

  int upload_iters = (param_.has_upload_iters() == true) ? param_.upload_iters(): 0;

  if (upload_iters) {
    CHECK(param_.has_upload_hostname() && param_.has_upload_port() && param_.has_exp_name());
    url = "http://" + param_.upload_hostname() + ":" + param_.upload_port();
    // check if server is up
    conn = new RestClient::Connection(url);
    // RestClient::Response upResp = conn->get(url);
    // CHECK_EQ(upResp.code, 200) << "server is not up";
    // FIXME: this should be in the headers
    RestClient::HeaderFields headers;
    // headers["Accept"] = "application/json";
    headers["Content-Type"] = "application/json";
    conn->SetHeaders(headers);
    // conn->AppendHeader("Content-Type", "application/json");
    string exp_name = "\"" + param_.exp_name() + "\"";
    LOG(INFO) << exp_name;
    RestClient::Response createResp = conn->post("/data", "\"xxx\"");
    CHECK_EQ(createResp.code, 200) << "Create " << param_.exp_name() << " Failed: " << createResp.body;
    LOG(INFO) << "parse ";
  }


  while (iter_ < stop_iter) {
    // zero-init the params
    net_->ClearParamDiffs();
    if (param_.test_interval() && iter_ % param_.test_interval() == 0
        && (iter_ > 0 || param_.test_initialization())) {
      if (Caffe::root_solver()) {
        TestAll();
      }
      if (requested_early_exit_) {
        // Break out of the while loop because stop was requested while testing.
        break;
      }
    }

    for (int i = 0; i < callbacks_.size(); ++i) {
      callbacks_[i]->on_start();
    }
    const bool display = param_.display() && iter_ % param_.display() == 0;
    net_->set_debug_info(display && param_.debug_info());
    // accumulate the loss and gradient
    Dtype loss = 0;
    for (int i = 0; i < param_.iter_size(); ++i) {
      loss += net_->ForwardBackward();
    }
    loss /= param_.iter_size();
    // average the loss across iterations for smoothed reporting
    UpdateSmoothedLoss(loss, start_iter, average_loss);
    if (upload_iters) {
      if (iter_ % upload_iters == 0) {
        string post = "/data/scalars?xp=" + param_.exp_name() + "&name=SmoothLoss";
        string data = "'[-1,-1," + patch::to_string(smoothed_loss_) + "]'";
        LOG(INFO) << data;
        RestClient::Response r = conn->post(url + post, data);
        CHECK_EQ(r.code, 200) << "Upload " << param_.exp_name() << " Failed";
      }
    }
    if (display) {
      float lapse = iteration_timer_.Seconds();
      float per_s = (iter_ - iterations_last_) / (lapse ? lapse : 1);
      LOG_IF(INFO, Caffe::root_solver()) << "Iteration " << iter_
          << " (" << per_s << " iter/s, " << lapse << "s/"
          << param_.display() << " iters), loss = " << smoothed_loss_;
      iteration_timer_.Start();
      iterations_last_ = iter_;
      const vector<Blob<Dtype>*>& result = net_->output_blobs();
      int score_index = 0;
      for (int j = 0; j < result.size(); ++j) {
        const Dtype* result_vec = result[j]->cpu_data();
        const string& output_name =
            net_->blob_names()[net_->output_blob_indices()[j]];
        const Dtype loss_weight =
            net_->blob_loss_weights()[net_->output_blob_indices()[j]];
        for (int k = 0; k < result[j]->count(); ++k) {
          ostringstream loss_msg_stream;
          if (loss_weight) {
            loss_msg_stream << " (* " << loss_weight
                            << " = " << loss_weight * result_vec[k] << " loss)";
          }
          LOG_IF(INFO, Caffe::root_solver()) << "    Train net output #"
              << score_index++ << ": " << output_name << " = "
              << result_vec[k] << loss_msg_stream.str();
        }
      }
    }
    for (int i = 0; i < callbacks_.size(); ++i) {
      callbacks_[i]->on_gradients_ready();
    }
    ApplyUpdate();

    // Increment the internal iter_ counter -- its value should always indicate
    // the number of times the weights have been updated.
    ++iter_;

    SolverAction::Enum request = GetRequestedAction();

    // Save a snapshot if needed.
    if ((param_.snapshot()
         && iter_ % param_.snapshot() == 0
         && Caffe::root_solver()) ||
         (request == SolverAction::SNAPSHOT)) {
      Snapshot();
    }
    if (SolverAction::STOP == request) {
      requested_early_exit_ = true;
      // Break out of training loop.
      break;
    }
  }
 }

 template <typename Dtype>
 void Solver<Dtype>::Solve(const char* resume_file) {
  CHECK(Caffe::root_solver());
  LOG(INFO) << "Solving " << net_->name();
  LOG(INFO) << "Learning Rate Policy: " << param_.lr_policy();

  // Initialize to false every time we start solving.
  requested_early_exit_ = false;

  if (resume_file) {
    LOG(INFO) << "Restoring previous solver status from " << resume_file;
    Restore(resume_file);
  }

  // For a network that is trained by the solver, no bottom or top vecs
  // should be given, and we will just provide dummy vecs.
  int start_iter = iter_;
  Step(param_.max_iter() - iter_);
  // If we haven't already, save a snapshot after optimization, unless
  // overridden by setting snapshot_after_train := false
  if (param_.snapshot_after_train()
      && (!param_.snapshot() || iter_ % param_.snapshot() != 0)) {
    Snapshot();
  }
  if (requested_early_exit_) {
    LOG(INFO) << "Optimization stopped early.";
    return;
  }
  // After the optimization is done, run an additional train and test pass to
  // display the train and test loss/outputs if appropriate (based on the
  // display and test_interval settings, respectively).  Unlike in the rest of
  // training, for the train net we only run a forward pass as we've already
  // updated the parameters "max_iter" times -- this final pass is only done to
  // display the loss, which is computed in the forward pass.
  if (param_.display() && iter_ % param_.display() == 0) {
    int average_loss = this->param_.average_loss();
    Dtype loss;
    net_->Forward(&loss);

    UpdateSmoothedLoss(loss, start_iter, average_loss);

    LOG(INFO) << "Iteration " << iter_ << ", loss = " << smoothed_loss_;
  }
  if (param_.test_interval() && iter_ % param_.test_interval() == 0) {
    TestAll();
  }
  LOG(INFO) << "Optimization Done.";
 }

 template <typename Dtype>
 void Solver<Dtype>::TestAll() {
  for (int test_net_id = 0;
       test_net_id < test_nets_.size() && !requested_early_exit_;
       ++test_net_id) {
    Test(test_net_id);
  }
 }

 template <typename Dtype>
 void Solver<Dtype>::Test(const int test_net_id) {
  CHECK(Caffe::root_solver());
  LOG(INFO) << "Iteration " << iter_
            << ", Testing net (#" << test_net_id << ")";
  CHECK_NOTNULL(test_nets_[test_net_id].get())->
      ShareTrainedLayersWith(net_.get());
  vector<Dtype> test_score;
  vector<int> test_score_output_id;
  const shared_ptr<Net<Dtype> >& test_net = test_nets_[test_net_id];
  Dtype loss = 0;
  for (int i = 0; i < param_.test_iter(test_net_id); ++i) {
    SolverAction::Enum request = GetRequestedAction();
    // Check to see if stoppage of testing/training has been requested.
    while (request != SolverAction::NONE) {
        if (SolverAction::SNAPSHOT == request) {
          Snapshot();
        } else if (SolverAction::STOP == request) {
          requested_early_exit_ = true;
        }
        request = GetRequestedAction();
    }
    if (requested_early_exit_) {
      // break out of test loop.
      break;
    }

    Dtype iter_loss;
    const vector<Blob<Dtype>*>& result =
        test_net->Forward(&iter_loss);
    if (param_.test_compute_loss()) {
      loss += iter_loss;
    }
    if (i == 0) {
      for (int j = 0; j < result.size(); ++j) {
        const Dtype* result_vec = result[j]->cpu_data();
        for (int k = 0; k < result[j]->count(); ++k) {
          test_score.push_back(result_vec[k]);
          test_score_output_id.push_back(j);
        }
      }
    } else {
      int idx = 0;
      for (int j = 0; j < result.size(); ++j) {
        const Dtype* result_vec = result[j]->cpu_data();
        for (int k = 0; k < result[j]->count(); ++k) {
          test_score[idx++] += result_vec[k];
        }
      }
    }
  }
  if (requested_early_exit_) {
    LOG(INFO)     << "Test interrupted.";
    return;
  }
  if (param_.test_compute_loss()) {
    loss /= param_.test_iter(test_net_id);
    LOG(INFO) << "Test loss: " << loss;
  }
  for (int i = 0; i < test_score.size(); ++i) {
    const int output_blob_index =
        test_net->output_blob_indices()[test_score_output_id[i]];
    const string& output_name = test_net->blob_names()[output_blob_index];
    const Dtype loss_weight = test_net->blob_loss_weights()[output_blob_index];
    ostringstream loss_msg_stream;
    const Dtype mean_score = test_score[i] / param_.test_iter(test_net_id);
    if (loss_weight) {
      loss_msg_stream << " (* " << loss_weight
                      << " = " << loss_weight * mean_score << " loss)";
    }
    LOG(INFO) << "    Test net output #" << i << ": " << output_name << " = "
              << mean_score << loss_msg_stream.str();
  }
 }

 template <typename Dtype>
 void Solver<Dtype>::Snapshot() {
  CHECK(Caffe::root_solver());
  string model_filename;
  switch (param_.snapshot_format()) {
  case caffe::SolverParameter_SnapshotFormat_BINARYPROTO:
    model_filename = SnapshotToBinaryProto();
    break;
  case caffe::SolverParameter_SnapshotFormat_HDF5:
    model_filename = SnapshotToHDF5();
    break;
  default:
    LOG(FATAL) << "Unsupported snapshot format.";
  }

  SnapshotSolverState(model_filename);
 }

 template <typename Dtype>
 void Solver<Dtype>::CheckSnapshotWritePermissions() {
  if (Caffe::root_solver() && param_.snapshot()) {
    CHECK(param_.has_snapshot_prefix())
        << "In solver params, snapshot is specified but snapshot_prefix is not";
    string probe_filename = SnapshotFilename(".tempfile");
    std::ofstream probe_ofs(probe_filename.c_str());
    if (probe_ofs.good()) {
      probe_ofs.close();
      std::remove(probe_filename.c_str());
    } else {
      LOG(FATAL) << "Cannot write to snapshot prefix '"
          << param_.snapshot_prefix() << "'.  Make sure "
          << "that the directory exists and is writeable.";
    }
  }
 }

 template <typename Dtype>
 string Solver<Dtype>::SnapshotFilename(const string extension) {
  return param_.snapshot_prefix() + "_iter_" + caffe::format_int(iter_)
    + extension;
 }

 template <typename Dtype>
 string Solver<Dtype>::SnapshotToBinaryProto() {
  string model_filename = SnapshotFilename(".caffemodel");
  LOG(INFO) << "Snapshotting to binary proto file " << model_filename;
  NetParameter net_param;
  net_->ToProto(&net_param, param_.snapshot_diff());
  WriteProtoToBinaryFile(net_param, model_filename);
  return model_filename;
 }

 template <typename Dtype>
 string Solver<Dtype>::SnapshotToHDF5() {
  string model_filename = SnapshotFilename(".caffemodel.h5");
  LOG(INFO) << "Snapshotting to HDF5 file " << model_filename;
  net_->ToHDF5(model_filename, param_.snapshot_diff());
  return model_filename;
 }

 template <typename Dtype>
 void Solver<Dtype>::Restore(const char* state_file) {
  string state_filename(state_file);
  if (state_filename.size() >= 3 &&
      state_filename.compare(state_filename.size() - 3, 3, ".h5") == 0) {
    RestoreSolverStateFromHDF5(state_filename);
  } else {
    RestoreSolverStateFromBinaryProto(state_filename);
  }
 }

 template <typename Dtype>
 void Solver<Dtype>::UpdateSmoothedLoss(Dtype loss, int start_iter,
    int average_loss) {
  LOG(INFO) << "start_iter " << start_iter;
  LOG(INFO) << "iter_  " << iter_;
  LOG(INFO) << "losses_ " << losses_.size();
  if (losses_.size() < average_loss) {
    losses_.push_back(loss);
    int size = losses_.size();
    smoothed_loss_ = (smoothed_loss_ * (size - 1) + loss) / size;
  } else {
    int idx = (iter_ - start_iter) % average_loss;
    LOG(INFO) << "idx " << idx;
    smoothed_loss_ += (loss - losses_[idx]) / average_loss;
    losses_[idx] = loss;
  }
 }

 INSTANTIATE_CLASS(Solver);

 }  // namespace caffe
	## Refer to http://caffe.berkeleyvision.org/installation.html
	# Contributions simplifying and improving our build system are welcome!

	# cuDNN acceleration switch (uncomment to build with cuDNN).
	USE_CUDNN := 1

	# CPU-only switch (uncomment to build without GPU support).
	# CPU_ONLY := 1

	# uncomment to disable IO dependencies and corresponding data layers
	# USE_OPENCV := 0
	# USE_LEVELDB := 0
	# USE_LMDB := 0

	# uncomment to allow MDB_NOLOCK when reading LMDB files (only if necessary)
	# You should not set this flag if you will be reading LMDBs with any
	# possibility of simultaneous read and write
	# ALLOW_LMDB_NOLOCK := 1

	# Uncomment if you're using OpenCV 3
	# OPENCV_VERSION := 3

	# To customize your choice of compiler, uncomment and set the following.
	# N.B. the default for Linux is g++ and the default for OSX is clang++
	# CUSTOM_CXX := g++

	# CUDA directory contains bin/ and lib/ directories that we need.
	CUDA_DIR := /usr/local/cuda
	# On Ubuntu 14.04, if cuda tools are installed via
	# "sudo apt-get install nvidia-cuda-toolkit" then use this instead:
	# CUDA_DIR := /usr

	# CUDA architecture setting: going with all of them.
	# For CUDA < 6.0, comment the _50 through _61 lines for compatibility.
	# For CUDA < 8.0, comment the _60 and _61 lines for compatibility.
	CUDA_ARCH := -gencode arch=compute_20,code=sm_20 \
	-gencode arch=compute_20,code=sm_21 \
	-gencode arch=compute_30,code=sm_30 \
	-gencode arch=compute_35,code=sm_35 \
	-gencode arch=compute_50,code=sm_50 \
	-gencode arch=compute_52,code=sm_52 \
	-gencode arch=compute_60,code=sm_60 \
	-gencode arch=compute_61,code=sm_61 \
	-gencode arch=compute_61,code=compute_61

	# BLAS choice:
	# atlas for ATLAS (default)
	# mkl for MKL
	# open for OpenBlas
	BLAS := atlas
	# Custom (MKL/ATLAS/OpenBLAS) include and lib directories.
	# Leave commented to accept the defaults for your choice of BLAS
	# (which should work)!
	# BLAS_INCLUDE := /path/to/your/blas
	# BLAS_LIB := /path/to/your/blas

	# Homebrew puts openblas in a directory that is not on the standard search path
	# BLAS_INCLUDE := $(shell brew --prefix openblas)/include
	# BLAS_LIB := $(shell brew --prefix openblas)/lib

	# This is required only if you will compile the matlab interface.
	# MATLAB directory should contain the mex binary in /bin.
	# MATLAB_DIR := /usr/local
	# MATLAB_DIR := /Applications/MATLAB_R2012b.app

	# NOTE: this is required only if you will compile the python interface.
	# We need to be able to find Python.h and numpy/arrayobject.h.
	PYTHON_INCLUDE := /usr/include/python2.7 \
	/usr/lib/python2.7/dist-packages/numpy/core/include
	# Anaconda Python distribution is quite popular. Include path:
	# Verify anaconda location, sometimes it's in root.
	# ANACONDA_HOME := $(HOME)/anaconda
	# PYTHON_INCLUDE := $(ANACONDA_HOME)/include \
	# $(ANACONDA_HOME)/include/python2.7 \
	# $(ANACONDA_HOME)/lib/python2.7/site-packages/numpy/core/include

	# Uncomment to use Python 3 (default is Python 2)
	# PYTHON_LIBRARIES := boost_python3 python3.5m
	# PYTHON_INCLUDE := /usr/include/python3.5m \
	# /usr/lib/python3.5/dist-packages/numpy/core/include

	# We need to be able to find libpythonX.X.so or .dylib.
	PYTHON_LIB := /usr/lib
	# PYTHON_LIB := $(ANACONDA_HOME)/lib

	# Homebrew installs numpy in a non standard path (keg only)
	# PYTHON_INCLUDE += $(dir $(shell python -c 'import numpy.core; print(numpy.core.__file__)'))/include
	# PYTHON_LIB += $(shell brew --prefix numpy)/lib

	# Uncomment to support layers written in Python (will link against Python libs)
	WITH_PYTHON_LAYER := 1

	# Whatever else you find you need goes here.
	# INCLUDE_DIRS := $(PYTHON_INCLUDE) $(HOME)/opencv_3.2.0/include /usr/local/include /usr/include/hdf5/serial
	# LIBRARY_DIRS := $(PYTHON_LIB) $(HOME)/opencv_3.2.0/lib /usr/local/lib /usr/lib /usr/lib/x86_64-linux-gnu /usr/lib/x86_64-linux-gnu/hdf5/serial
	INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include /usr/include/hdf5/serial $(HOME)/restclient-cpp/build/include/
	LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib /usr/lib/x86_64-linux-gnu /usr/lib/x86_64-linux-gnu/hdf5/serial $(HOME)/restclient-cpp/build/lib/

	# If Homebrew is installed at a non standard location (for example your home directory) and you use it for general dependencies
	# INCLUDE_DIRS += $(shell brew --prefix)/include
	# LIBRARY_DIRS += $(shell brew --prefix)/lib

	# NCCL acceleration switch (uncomment to build with NCCL)
	# https://github.com/NVIDIA/nccl (last tested version: v1.2.3-1+cuda8.0)
	# USE_NCCL := 1

	# Uncomment to use `pkg-config` to specify OpenCV library paths.
	# (Usually not necessary -- OpenCV libraries are normally installed in one of the above $LIBRARY_DIRS.)
	# USE_PKG_CONFIG := 1

	# N.B. both build and distribute dirs are cleared on `make clean`
	BUILD_DIR := build
	DISTRIBUTE_DIR := distribute

	# Uncomment for debugging. Does not work on OSX due to https://github.com/BVLC/caffe/issues/171
	# DEBUG := 1

	# The ID of the GPU that 'make runtest' will use to run unit tests.
	TEST_GPUID := 0

	# enable pretty build (comment to see full commands)
	Q ?= @

	# If Homebrew is installed at a non standard location (for example your home directory) and you use it for general dependencies
	# INCLUDE_DIRS += $(shell brew --prefix)/include
	# LIBRARY_DIRS += $(shell brew --prefix)/lib

	# NCCL acceleration switch (uncomment to build with NCCL)
	# https://github.com/NVIDIA/nccl (last tested version: v1.2.3-1+cuda8.0)
	# USE_NCCL := 1

	# Uncomment to use `pkg-config` to specify OpenCV library paths.
	# (Usually not necessary -- OpenCV libraries are normally installed in one of the above $LIBRARY_DIRS.)
	# USE_PKG_CONFIG := 1

	# N.B. both build and distribute dirs are cleared on `make clean`
	BUILD_DIR := build
	DISTRIBUTE_DIR := distribute

	# Uncomment for debugging. Does not work on OSX due to https://github.com/BVLC/caffe/issues/171
	# DEBUG := 1

	# The ID of the GPU that 'make runtest' will use to run unit tests.
	TEST_GPUID := 0

	# enable pretty build (comment to see full commands)
	Q ?= @
	#include <cstdio>

	#include <string>
	#include <vector>

	#include "restclient-cpp/restclient.h"
	#include "restclient-cpp/connection.h"
	#include "caffe/solver.hpp"
	#include "caffe/util/format.hpp"
	#include "caffe/util/hdf5.hpp"
	#include "caffe/util/io.hpp"
	#include "caffe/util/upgrade_proto.hpp"

	namespace patch
	{
	template < typename T > std::string to_string( const T& n )
	{
	std::ostringstream stm ;
	stm << n ;
	return stm.str() ;
	}
	}

	namespace caffe {

	template<typename Dtype>
	void Solver<Dtype>::SetActionFunction(ActionCallback func) {
	action_request_function_ = func;
	}

	template<typename Dtype>
	SolverAction::Enum Solver<Dtype>::GetRequestedAction() {
	if (action_request_function_) {
	// If the external request function has been set, call it.
	return action_request_function_();
	}
	return SolverAction::NONE;
	}

	template <typename Dtype>
	Solver<Dtype>::Solver(const SolverParameter& param)
	: net_(), callbacks_(), requested_early_exit_(false) {
	Init(param);
	}

	template <typename Dtype>
	Solver<Dtype>::Solver(const string& param_file)
	: net_(), callbacks_(), requested_early_exit_(false) {
	SolverParameter param;
	ReadSolverParamsFromTextFileOrDie(param_file, &param);
	Init(param);
	}

	template <typename Dtype>
	void Solver<Dtype>::Init(const SolverParameter& param) {
	LOG(INFO) << "init" << param.DebugString();
	LOG_IF(INFO, Caffe::root_solver()) << "Initializing solver from parameters: "
	<< std::endl << param.DebugString();
	LOG(INFO) << "init done";
	param_ = param;
	CHECK_GE(param_.average_loss(), 1) << "average_loss should be non-negative.";
	CheckSnapshotWritePermissions();
	if (param_.random_seed() >= 0) {
	Caffe::set_random_seed(param_.random_seed() + Caffe::solver_rank());
	}
	// Scaffolding code
	InitTrainNet();
	InitTestNets();
	if (Caffe::root_solver()) {
	LOG(INFO) << "Solver scaffolding done.";
	}
	iter_ = 0;
	current_step_ = 0;
	}

	template <typename Dtype>
	void Solver<Dtype>::InitTrainNet() {
	const int num_train_nets = param_.has_net() + param_.has_net_param() +
	param_.has_train_net() + param_.has_train_net_param();
	const string& field_names = "net, net_param, train_net, train_net_param";
	CHECK_GE(num_train_nets, 1) << "SolverParameter must specify a train net "
	<< "using one of these fields: " << field_names;
	CHECK_LE(num_train_nets, 1) << "SolverParameter must not contain more than "
	<< "one of these fields specifying a train_net: " << field_names;
	NetParameter net_param;
	if (param_.has_train_net_param()) {
	LOG_IF(INFO, Caffe::root_solver())
	<< "Creating training net specified in train_net_param.";
	net_param.CopyFrom(param_.train_net_param());
	} else if (param_.has_train_net()) {
	LOG_IF(INFO, Caffe::root_solver())
	<< "Creating training net from train_net file: " << param_.train_net();
	ReadNetParamsFromTextFileOrDie(param_.train_net(), &net_param);
	}
	if (param_.has_net_param()) {
	LOG_IF(INFO, Caffe::root_solver())
	<< "Creating training net specified in net_param.";
	net_param.CopyFrom(param_.net_param());
	}
	if (param_.has_net()) {
	LOG_IF(INFO, Caffe::root_solver())
	<< "Creating training net from net file: " << param_.net();
	ReadNetParamsFromTextFileOrDie(param_.net(), &net_param);
	}
	// Set the correct NetState. We start with the solver defaults (lowest
	// precedence); then, merge in any NetState specified by the net_param itself;
	// finally, merge in any NetState specified by the train_state (highest
	// precedence).
	NetState net_state;
	net_state.set_phase(TRAIN);
	net_state.MergeFrom(net_param.state());
	net_state.MergeFrom(param_.train_state());
	net_param.mutable_state()->CopyFrom(net_state);
	net_.reset(new Net<Dtype>(net_param));
	}

	template <typename Dtype>
	void Solver<Dtype>::InitTestNets() {
	const bool has_net_param = param_.has_net_param();
	const bool has_net_file = param_.has_net();
	const int num_generic_nets = has_net_param + has_net_file;
	CHECK_LE(num_generic_nets, 1)
	<< "Both net_param and net_file may not be specified.";
	const int num_test_net_params = param_.test_net_param_size();
	const int num_test_net_files = param_.test_net_size();
	const int num_test_nets = num_test_net_params + num_test_net_files;
	if (num_generic_nets) {
	CHECK_GE(param_.test_iter_size(), num_test_nets)
	<< "test_iter must be specified for each test network.";
	} else {
	CHECK_EQ(param_.test_iter_size(), num_test_nets)
	<< "test_iter must be specified for each test network.";
	}
	// If we have a generic net (specified by net or net_param, rather than
	// test_net or test_net_param), we may have an unlimited number of actual
	// test networks -- the actual number is given by the number of remaining
	// test_iters after any test nets specified by test_net_param and/or test_net
	// are evaluated.
	const int num_generic_net_instances = param_.test_iter_size() - num_test_nets;
	const int num_test_net_instances = num_test_nets + num_generic_net_instances;
	if (param_.test_state_size()) {
	CHECK_EQ(param_.test_state_size(), num_test_net_instances)
	<< "test_state must be unspecified or specified once per test net.";
	}
	if (num_test_net_instances) {
	CHECK_GT(param_.test_interval(), 0);
	}
	int test_net_id = 0;
	vector<string> sources(num_test_net_instances);
	vector<NetParameter> net_params(num_test_net_instances);
	for (int i = 0; i < num_test_net_params; ++i, ++test_net_id) {
	sources[test_net_id] = "test_net_param";
	net_params[test_net_id].CopyFrom(param_.test_net_param(i));
	}
	for (int i = 0; i < num_test_net_files; ++i, ++test_net_id) {
	sources[test_net_id] = "test_net file: " + param_.test_net(i);
	ReadNetParamsFromTextFileOrDie(param_.test_net(i),
	&net_params[test_net_id]);
	}
	const int remaining_test_nets = param_.test_iter_size() - test_net_id;
	if (has_net_param) {
	for (int i = 0; i < remaining_test_nets; ++i, ++test_net_id) {
	sources[test_net_id] = "net_param";
	net_params[test_net_id].CopyFrom(param_.net_param());
	}
	}
	if (has_net_file) {
	for (int i = 0; i < remaining_test_nets; ++i, ++test_net_id) {
	sources[test_net_id] = "net file: " + param_.net();
	ReadNetParamsFromTextFileOrDie(param_.net(), &net_params[test_net_id]);
	}
	}
	test_nets_.resize(num_test_net_instances);
	for (int i = 0; i < num_test_net_instances; ++i) {
	// Set the correct NetState. We start with the solver defaults (lowest
	// precedence); then, merge in any NetState specified by the net_param
	// itself; finally, merge in any NetState specified by the test_state
	// (highest precedence).
	NetState net_state;
	net_state.set_phase(TEST);
	net_state.MergeFrom(net_params[i].state());
	if (param_.test_state_size()) {
	net_state.MergeFrom(param_.test_state(i));
	}
	net_params[i].mutable_state()->CopyFrom(net_state);
	LOG(INFO)
	<< "Creating test net (#" << i << ") specified by " << sources[i];
	test_nets_[i].reset(new Net<Dtype>(net_params[i]));
	test_nets_[i]->set_debug_info(param_.debug_info());
	}
	}

	template <typename Dtype>
	void Solver<Dtype>::Step(int iters) {
	const int start_iter = iter_;
	const int stop_iter = iter_ + iters;
	int average_loss = this->param_.average_loss();
	losses_.clear();
	smoothed_loss_ = 0;
	iteration_timer_.Start();
	string url = "";

	// initialize RestClient
	CHECK_EQ(RestClient::init(), 0) << "libcurl init error";

	int upload_iters = (param_.has_upload_iters() == true) ? param_.upload_iters(): 0;

	if (upload_iters) {
	CHECK(param_.has_upload_hostname() && param_.has_upload_port() && param_.has_exp_name());
	url = "http://" + param_.upload_hostname() + ":" + param_.upload_port();
	// check if server is up
	conn = new RestClient::Connection(url);
	// RestClient::Response upResp = conn->get(url);
	// CHECK_EQ(upResp.code, 200) << "server is not up";
	// FIXME: this should be in the headers
	RestClient::HeaderFields headers;
	// headers["Accept"] = "application/json";
	headers["Content-Type"] = "application/json";
	conn->SetHeaders(headers);
	// conn->AppendHeader("Content-Type", "application/json");
	string exp_name = "\"" + param_.exp_name() + "\"";
	LOG(INFO) << exp_name;
	RestClient::Response createResp = conn->post("/data", "\"xxx\"");
	CHECK_EQ(createResp.code, 200) << "Create " << param_.exp_name() << " Failed: " << createResp.body;
	LOG(INFO) << "parse ";
	}


	while (iter_ < stop_iter) {
	// zero-init the params
	net_->ClearParamDiffs();
	if (param_.test_interval() && iter_ % param_.test_interval() == 0
	&& (iter_ > 0 \|\| param_.test_initialization())) {
	if (Caffe::root_solver()) {
	TestAll();
	}
	if (requested_early_exit_) {
	// Break out of the while loop because stop was requested while testing.
	break;
	}
	}

	for (int i = 0; i < callbacks_.size(); ++i) {
	callbacks_[i]->on_start();
	}
	const bool display = param_.display() && iter_ % param_.display() == 0;
	net_->set_debug_info(display && param_.debug_info());
	// accumulate the loss and gradient
	Dtype loss = 0;
	for (int i = 0; i < param_.iter_size(); ++i) {
	loss += net_->ForwardBackward();
	}
	loss /= param_.iter_size();
	// average the loss across iterations for smoothed reporting
	UpdateSmoothedLoss(loss, start_iter, average_loss);
	if (upload_iters) {
	if (iter_ % upload_iters == 0) {
	string post = "/data/scalars?xp=" + param_.exp_name() + "&name=SmoothLoss";
	string data = "'[-1,-1," + patch::to_string(smoothed_loss_) + "]'";
	LOG(INFO) << data;
	RestClient::Response r = conn->post(url + post, data);
	CHECK_EQ(r.code, 200) << "Upload " << param_.exp_name() << " Failed";
	}
	}
	if (display) {
	float lapse = iteration_timer_.Seconds();
	float per_s = (iter_ - iterations_last_) / (lapse ? lapse : 1);
	LOG_IF(INFO, Caffe::root_solver()) << "Iteration " << iter_
	<< " (" << per_s << " iter/s, " << lapse << "s/"
	<< param_.display() << " iters), loss = " << smoothed_loss_;
	iteration_timer_.Start();
	iterations_last_ = iter_;
	const vector<Blob<Dtype>*>& result = net_->output_blobs();
	int score_index = 0;
	for (int j = 0; j < result.size(); ++j) {
	const Dtype* result_vec = result[j]->cpu_data();
	const string& output_name =
	net_->blob_names()[net_->output_blob_indices()[j]];
	const Dtype loss_weight =
	net_->blob_loss_weights()[net_->output_blob_indices()[j]];
	for (int k = 0; k < result[j]->count(); ++k) {
	ostringstream loss_msg_stream;
	if (loss_weight) {
	loss_msg_stream << " (* " << loss_weight
	<< " = " << loss_weight * result_vec[k] << " loss)";
	}
	LOG_IF(INFO, Caffe::root_solver()) << " Train net output #"
	<< score_index++ << ": " << output_name << " = "
	<< result_vec[k] << loss_msg_stream.str();
	}
	}
	}
	for (int i = 0; i < callbacks_.size(); ++i) {
	callbacks_[i]->on_gradients_ready();
	}
	ApplyUpdate();

	// Increment the internal iter_ counter -- its value should always indicate
	// the number of times the weights have been updated.
	++iter_;

	SolverAction::Enum request = GetRequestedAction();

	// Save a snapshot if needed.
	if ((param_.snapshot()
	&& iter_ % param_.snapshot() == 0
	&& Caffe::root_solver()) \|\|
	(request == SolverAction::SNAPSHOT)) {
	Snapshot();
	}
	if (SolverAction::STOP == request) {
	requested_early_exit_ = true;
	// Break out of training loop.
	break;
	}
	}
	}

	template <typename Dtype>
	void Solver<Dtype>::Solve(const char* resume_file) {
	CHECK(Caffe::root_solver());
	LOG(INFO) << "Solving " << net_->name();
	LOG(INFO) << "Learning Rate Policy: " << param_.lr_policy();

	// Initialize to false every time we start solving.
	requested_early_exit_ = false;

	if (resume_file) {
	LOG(INFO) << "Restoring previous solver status from " << resume_file;
	Restore(resume_file);
	}

	// For a network that is trained by the solver, no bottom or top vecs
	// should be given, and we will just provide dummy vecs.
	int start_iter = iter_;
	Step(param_.max_iter() - iter_);
	// If we haven't already, save a snapshot after optimization, unless
	// overridden by setting snapshot_after_train := false
	if (param_.snapshot_after_train()
	&& (!param_.snapshot() \|\| iter_ % param_.snapshot() != 0)) {
	Snapshot();
	}
	if (requested_early_exit_) {
	LOG(INFO) << "Optimization stopped early.";
	return;
	}
	// After the optimization is done, run an additional train and test pass to
	// display the train and test loss/outputs if appropriate (based on the
	// display and test_interval settings, respectively). Unlike in the rest of
	// training, for the train net we only run a forward pass as we've already
	// updated the parameters "max_iter" times -- this final pass is only done to
	// display the loss, which is computed in the forward pass.
	if (param_.display() && iter_ % param_.display() == 0) {
	int average_loss = this->param_.average_loss();
	Dtype loss;
	net_->Forward(&loss);

	UpdateSmoothedLoss(loss, start_iter, average_loss);

	LOG(INFO) << "Iteration " << iter_ << ", loss = " << smoothed_loss_;
	}
	if (param_.test_interval() && iter_ % param_.test_interval() == 0) {
	TestAll();
	}
	LOG(INFO) << "Optimization Done.";
	}

	template <typename Dtype>
	void Solver<Dtype>::TestAll() {
	for (int test_net_id = 0;
	test_net_id < test_nets_.size() && !requested_early_exit_;
	++test_net_id) {
	Test(test_net_id);
	}
	}

	template <typename Dtype>
	void Solver<Dtype>::Test(const int test_net_id) {
	CHECK(Caffe::root_solver());
	LOG(INFO) << "Iteration " << iter_
	<< ", Testing net (#" << test_net_id << ")";
	CHECK_NOTNULL(test_nets_[test_net_id].get())->
	ShareTrainedLayersWith(net_.get());
	vector<Dtype> test_score;
	vector<int> test_score_output_id;
	const shared_ptr<Net<Dtype> >& test_net = test_nets_[test_net_id];
	Dtype loss = 0;
	for (int i = 0; i < param_.test_iter(test_net_id); ++i) {
	SolverAction::Enum request = GetRequestedAction();
	// Check to see if stoppage of testing/training has been requested.
	while (request != SolverAction::NONE) {
	if (SolverAction::SNAPSHOT == request) {
	Snapshot();
	} else if (SolverAction::STOP == request) {
	requested_early_exit_ = true;
	}
	request = GetRequestedAction();
	}
	if (requested_early_exit_) {
	// break out of test loop.
	break;
	}

	Dtype iter_loss;
	const vector<Blob<Dtype>*>& result =
	test_net->Forward(&iter_loss);
	if (param_.test_compute_loss()) {
	loss += iter_loss;
	}
	if (i == 0) {
	for (int j = 0; j < result.size(); ++j) {
	const Dtype* result_vec = result[j]->cpu_data();
	for (int k = 0; k < result[j]->count(); ++k) {
	test_score.push_back(result_vec[k]);
	test_score_output_id.push_back(j);
	}
	}
	} else {
	int idx = 0;
	for (int j = 0; j < result.size(); ++j) {
	const Dtype* result_vec = result[j]->cpu_data();
	for (int k = 0; k < result[j]->count(); ++k) {
	test_score[idx++] += result_vec[k];
	}
	}
	}
	}
	if (requested_early_exit_) {
	LOG(INFO) << "Test interrupted.";
	return;
	}
	if (param_.test_compute_loss()) {
	loss /= param_.test_iter(test_net_id);
	LOG(INFO) << "Test loss: " << loss;
	}
	for (int i = 0; i < test_score.size(); ++i) {
	const int output_blob_index =
	test_net->output_blob_indices()[test_score_output_id[i]];
	const string& output_name = test_net->blob_names()[output_blob_index];
	const Dtype loss_weight = test_net->blob_loss_weights()[output_blob_index];
	ostringstream loss_msg_stream;
	const Dtype mean_score = test_score[i] / param_.test_iter(test_net_id);
	if (loss_weight) {
	loss_msg_stream << " (* " << loss_weight
	<< " = " << loss_weight * mean_score << " loss)";
	}
	LOG(INFO) << " Test net output #" << i << ": " << output_name << " = "
	<< mean_score << loss_msg_stream.str();
	}
	}

	template <typename Dtype>
	void Solver<Dtype>::Snapshot() {
	CHECK(Caffe::root_solver());
	string model_filename;
	switch (param_.snapshot_format()) {
	case caffe::SolverParameter_SnapshotFormat_BINARYPROTO:
	model_filename = SnapshotToBinaryProto();
	break;
	case caffe::SolverParameter_SnapshotFormat_HDF5:
	model_filename = SnapshotToHDF5();
	break;
	default:
	LOG(FATAL) << "Unsupported snapshot format.";
	}

	SnapshotSolverState(model_filename);
	}

	template <typename Dtype>
	void Solver<Dtype>::CheckSnapshotWritePermissions() {
	if (Caffe::root_solver() && param_.snapshot()) {
	CHECK(param_.has_snapshot_prefix())
	<< "In solver params, snapshot is specified but snapshot_prefix is not";
	string probe_filename = SnapshotFilename(".tempfile");
	std::ofstream probe_ofs(probe_filename.c_str());
	if (probe_ofs.good()) {
	probe_ofs.close();
	std::remove(probe_filename.c_str());
	} else {
	LOG(FATAL) << "Cannot write to snapshot prefix '"
	<< param_.snapshot_prefix() << "'. Make sure "
	<< "that the directory exists and is writeable.";
	}
	}
	}

	template <typename Dtype>
	string Solver<Dtype>::SnapshotFilename(const string extension) {
	return param_.snapshot_prefix() + "_iter_" + caffe::format_int(iter_)
	+ extension;
	}

	template <typename Dtype>
	string Solver<Dtype>::SnapshotToBinaryProto() {
	string model_filename = SnapshotFilename(".caffemodel");
	LOG(INFO) << "Snapshotting to binary proto file " << model_filename;
	NetParameter net_param;
	net_->ToProto(&net_param, param_.snapshot_diff());
	WriteProtoToBinaryFile(net_param, model_filename);
	return model_filename;
	}

	template <typename Dtype>
	string Solver<Dtype>::SnapshotToHDF5() {
	string model_filename = SnapshotFilename(".caffemodel.h5");
	LOG(INFO) << "Snapshotting to HDF5 file " << model_filename;
	net_->ToHDF5(model_filename, param_.snapshot_diff());
	return model_filename;
	}

	template <typename Dtype>
	void Solver<Dtype>::Restore(const char* state_file) {
	string state_filename(state_file);
	if (state_filename.size() >= 3 &&
	state_filename.compare(state_filename.size() - 3, 3, ".h5") == 0) {
	RestoreSolverStateFromHDF5(state_filename);
	} else {
	RestoreSolverStateFromBinaryProto(state_filename);
	}
	}

	template <typename Dtype>
	void Solver<Dtype>::UpdateSmoothedLoss(Dtype loss, int start_iter,
	int average_loss) {
	LOG(INFO) << "start_iter " << start_iter;
	LOG(INFO) << "iter_ " << iter_;
	LOG(INFO) << "losses_ " << losses_.size();
	if (losses_.size() < average_loss) {
	losses_.push_back(loss);
	int size = losses_.size();
	smoothed_loss_ = (smoothed_loss_ * (size - 1) + loss) / size;
	} else {
	int idx = (iter_ - start_iter) % average_loss;
	LOG(INFO) << "idx " << idx;
	smoothed_loss_ += (loss - losses_[idx]) / average_loss;
	losses_[idx] = loss;
	}
	}

	INSTANTIATE_CLASS(Solver);

	} // namespace caffe