| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package singa; |
| |
| option java_package = "org.apache.singa.proto"; |
| /// \file layer.proto is adapted from [Caffe](https://github.com/BVLC/caffe/)'s |
| /// proto file with commit id c419f8517b1e1b3d7a07fe212fc6c90a70b519ea. We |
| /// use caffe's protocol for configuring layer hyper-parameters for easy |
| /// transporting Caffe model into SINGA. Specifically, we do the following |
| /// changes: |
| /// 1. we rename LayerParameter to LayerConf to differentiate model parameters |
| /// 2. we rename xxxParameter to xxxConf for fields of LayerParameter |
| /// 3. we comment out some fields (using /*...*/) not used in SINGA layer but |
| /// reserve their tags. |
| /// 4. we add new fields (commented like 'singa field..') to support our own |
| /// functionalities. |
| /// TODO(wangwei) write a proto converter to automatically load caffe models |
| /// using Python (or C++/Java). |
| |
| // Specifies the shape (dimensions) of a Blob. |
| message BlobShape { |
| repeated int64 dim = 1 [packed = true]; |
| } |
| |
| message BlobProto { |
| optional BlobShape shape = 7; |
| repeated float data = 5 [packed = true]; |
| repeated float diff = 6 [packed = true]; |
| repeated double double_data = 8 [packed = true]; |
| repeated double double_diff = 9 [packed = true]; |
| |
| // 4D dimensions -- deprecated. Use "shape" instead. |
| optional int32 num = 1 [default = 0]; |
| optional int32 channels = 2 [default = 0]; |
| optional int32 height = 3 [default = 0]; |
| optional int32 width = 4 [default = 0]; |
| } |
| |
| message FillerConf { |
| // The filler type, case insensitive |
| optional string type = 1 [default = 'constant']; |
| optional float value = 2 [default = 0]; // the value in constant filler |
| optional float min = 3 [default = 0]; // the min value in uniform filler |
| optional float max = 4 [default = 1]; // the max value in uniform filler |
| optional float mean = 5 [default = 0]; // the mean value in Gaussian filler |
| optional float std = 6 [default = 1]; // the std value in Gaussian filler |
| // The expected number of non-zero output weights for a given input in |
| // Gaussian filler -- the default -1 means don't perform sparsification. |
| /* optional int32 sparse = 7 [default = -1]; */ |
| // Normalize the filler variance by fan_in, fan_out, or their average. |
| // Applies to 'xavier' and 'msra' fillers. |
| enum VarianceNorm { |
| FAN_IN = 0; |
| FAN_OUT = 1; |
| AVERAGE = 2; |
| } |
| optional VarianceNorm variance_norm = 8 [default = FAN_IN]; |
| } |
| |
| /// SINGA message |
| message OptimizerConf { |
| // case insensitive |
| optional string type = 1 [default = "sgd"]; |
| |
| // used by RMSprop and Adadelta |
| optional float rho = 2 [default = 0.95]; |
| |
| // used by Adam and AdamMax |
| optional float beta_1 = 3 [default = 0.9]; |
| optional float beta_2 = 4 [default = 0.999]; |
| |
| // used by vanilla sgd and nesterov |
| optional float momentum = 5 [default = 0.9]; |
| |
| // delta is used to avoid dividing zero |
| optional float delta = 6 [default = 1e-8]; |
| |
| // global regularizer lower priority than ParamSpec regularizer |
| optional RegularizerConf regularizer = 10; |
| // global constraint lower priority than ParamSpec constraint |
| optional ConstraintConf constraint = 11; |
| } |
| |
| message ConstraintConf { |
| // case insensitive to limit the parameter value/gradient scale |
| optional string type = 1 [default = "l2"]; |
| // e.g., the threshold for limiting the parameter scale. |
| optional float threshold = 2; |
| } |
| |
| /// SINGA message |
| message RegularizerConf { |
| // case insensitive to regularize the parameters, e.g., L2. |
| optional string type = 1 [default = "l2"]; |
| // e.g., the weight decay for L2 regularizer |
| optional float coefficient = 2; |
| } |
| |
| // Specifies training parameters (multipliers on global learning constants, |
| // and the name and other settings used for weight sharing). |
| message ParamSpec { |
| // The names of the parameter blobs -- useful for sharing parameters among |
| // layers, but never required otherwise. To share a parameter between two |
| // layers, give it a (non-empty) name. |
| optional string name = 1; |
| |
| // Whether to require shared weights to have the same shape, or just the same |
| // count -- defaults to STRICT if unspecified. |
| /* |
| optional DimCheckMode share_mode = 2; |
| enum DimCheckMode { |
| // STRICT (default) requires that num, channels, height, width each match. |
| STRICT = 0; |
| // PERMISSIVE requires only the count (num*channels*height*width) to match. |
| PERMISSIVE = 1; |
| } |
| */ |
| |
| // The multiplier on the global learning rate for this parameter. |
| optional float lr_mult = 3 [default = 1.0]; |
| |
| // The multiplier on the global weight decay for this parameter. |
| optional float decay_mult = 4 [default = 1.0]; |
| |
| // SINGA uses this filed internally. Users just configure the fillers in |
| // Layer specific conf message as caffe (style). |
| optional FillerConf filler = 20; |
| optional ConstraintConf constraint = 21; |
| optional RegularizerConf regularizer = 22; |
| } |
| |
| enum Phase { |
| kTrain = 4; |
| kEval = 8; |
| } |
| // NOTE |
| // Update the next available ID when you add a new LayerConf field. |
| // |
| // LayerConf next available layer-specific ID: 139 (last added: tile_param) |
| message LayerConf { |
| optional string name = 1; // the layer name |
| optional string type = 2; // the layer type |
| /* repeated string bottom = 3; // the name of each bottom blob */ |
| /* repeated string top = 4; // the name of each top blob */ |
| |
| // The train / test phase for computation. |
| // optional Phase phase = 10; |
| |
| // The amount of weight to assign each top blob in the objective. |
| // Each layer assigns a default value, usually of either 0 or 1, |
| // to each top blob. |
| /* repeated float loss_weight = 5; */ |
| |
| // Specifies training parameters (multipliers on global learning constants, |
| // and the name and other settings used for weight sharing). |
| repeated ParamSpec param = 6; |
| |
| // The blobs containing the numeric parameters of the layer. |
| repeated BlobProto blobs = 7; |
| |
| // Specifies on which bottoms the backpropagation should be skipped. |
| // The size must be either 0 or equal to the number of bottoms. |
| /* repeated bool propagate_down = 11; */ |
| |
| // Rules controlling whether and when a layer is included in the network, |
| // based on the current NetState. You may specify a non-zero number of rules |
| // to include OR exclude, but not both. If no include or exclude rules are |
| // specified, the layer is always included. If the current NetState meets |
| // ANY (i.e., one or more) of the specified rules, the layer is |
| // included/excluded. |
| /* repeated NetStateRule include = 8; */ |
| /* repeated NetStateRule exclude = 9; */ |
| |
| // Confs for data pre-processing. |
| /* optional TransformationConf transform_param = 100; */ |
| |
| // Confs shared by loss layers. |
| /* optional LossConf loss_param = 101; */ |
| |
| // Layer type-specific parameters. |
| // |
| // Note: certain layers may have more than one computational engine |
| // for their implementation. These layers include an Engine type and |
| // engine parameter for selecting the implementation. |
| // The default for the engine is set by the ENGINE switch at compile-time. |
| //optional AccuracyConf accuracy_conf = 102; |
| optional ArgMaxConf argmax_conf = 103; |
| optional ConcatConf concat_conf = 104; |
| optional ContrastiveLossConf contrastive_loss_conf = 105; |
| optional ConvolutionConf convolution_conf = 106; |
| optional RNNConf rnn_conf = 140; |
| // optional DataConf data_conf = 107; |
| optional DropoutConf dropout_conf = 108; |
| // optional DummyDataConf dummy_data_conf = 109; |
| optional EltwiseConf eltwise_conf = 110; |
| optional EmbedConf embed_conf = 137; |
| optional ExpConf exp_conf = 111; |
| optional FlattenConf flatten_conf = 135; |
| // optional HDF5DataConf hdf5_data_conf = 112; |
| // optional HDF5OutputConf hdf5_output_conf = 113; |
| optional HingeLossConf hinge_loss_conf = 114; |
| // optional ImageDataConf image_data_conf = 115; |
| optional InfogainLossConf infogain_loss_conf = 116; |
| // use dense_conf to replace this inner_product_conf |
| // optional InnerProductConf inner_product_conf = 117; |
| optional LogConf log_conf = 134; |
| optional LRNConf lrn_conf = 118; |
| // optional MemoryDataConf memory_data_conf = 119; |
| optional MVNConf mvn_conf = 120; |
| optional PoolingConf pooling_conf = 121; |
| optional PowerConf power_conf = 122; |
| optional PReLUConf prelu_conf = 131; |
| // optional PythonConf python_conf = 130; |
| optional ReductionConf reduction_conf = 136; |
| optional ReLUConf relu_conf = 123; |
| optional ReshapeConf reshape_conf = 133; |
| optional SigmoidConf sigmoid_conf = 124; |
| optional SoftmaxConf softmax_conf = 125; |
| optional SPPConf spp_conf = 132; |
| optional SliceConf slice_conf = 126; |
| optional TanHConf tanh_conf = 127; |
| optional ThresholdConf threshold_conf = 128; |
| optional TileConf tile_conf = 138; |
| //optional WindowDataConf window_data_conf = 129; |
| |
| // Used in SINGA |
| optional DenseConf dense_conf = 117; |
| optional MetricConf metric_conf = 200; |
| optional BatchNormConf batchnorm_conf = 202; |
| optional SplitConf split_conf = 203; |
| } |
| |
| // Message that stores hyper-parameters used to apply transformation |
| // to the data layer's data |
| /* |
| message TransformationConf { |
| // For data pre-processing, we can do simple scaling and subtracting the |
| // data mean, if provided. Note that the mean subtraction is always carried |
| // out before scaling. |
| optional float scale = 1 [default = 1]; |
| // Specify if we want to randomly mirror data. |
| optional bool mirror = 2 [default = false]; |
| // Specify if we would like to randomly crop an image. |
| optional uint32 crop_size = 3 [default = 0]; |
| // mean_file and mean_value cannot be specified at the same time |
| optional string mean_file = 4; |
| // if specified can be repeated once (would substract it from all the channels) |
| // or can be repeated the same number of times as channels |
| // (would subtract them from the corresponding channel) |
| repeated float mean_value = 5; |
| // Force the decoded image to have 3 color channels. |
| optional bool force_color = 6 [default = false]; |
| // Force the decoded image to have 1 color channels. |
| optional bool force_gray = 7 [default = false]; |
| } |
| */ |
| |
| // Message that stores hyper-parameters shared by loss layers |
| message LossConf { |
| // If specified, ignore instances with the given label. |
| optional int32 ignore_label = 1; |
| // If true, normalize each batch across all instances (including spatial |
| // dimesions, but not ignored instances); else, divide by batch size only. |
| optional bool normalize = 2 [default = true]; |
| } |
| |
| message MetricConf { |
| // When computing accuracy, count as correct by comparing the true label to |
| // the top k scoring classes. By default, only compare to the top scoring |
| // class (i.e. argmax). |
| optional uint32 top_k = 1 [default = 1]; |
| |
| // The "label" axis of the prediction blob, whose argmax corresponds to the |
| // predicted label -- may be negative to index from the end (e.g., -1 for the |
| // last axis). For example, if axis == 1 and the predictions are |
| // (N x C x H x W), the label blob is expected to contain N*H*W ground truth |
| // labels with integer values in {0, 1, ..., C-1}. |
| optional int32 axis = 2 [default = 1]; |
| |
| // If specified, ignore instances with the given label. |
| optional int32 ignore_label = 3; |
| } |
| // Messages that store hyper-parameters used by individual layer types follow, in |
| // alphabetical order. |
| |
| |
| |
| message ArgMaxConf { |
| // If true produce pairs (argmax, maxval) |
| optional bool out_max_val = 1 [default = false]; |
| optional uint32 top_k = 2 [default = 1]; |
| // The axis along which to maximise -- may be negative to index from the |
| // end (e.g., -1 for the last axis). |
| // By default ArgMaxLayer maximizes over the flattened trailing dimensions |
| // for each index of the first / num dimension. |
| optional int32 axis = 3; |
| } |
| |
| message ConcatConf { |
| // The axis along which to concatenate -- may be negative to index from the |
| // end (e.g., -1 for the last axis). Other axes must have the |
| // same dimension for all the bottom blobs. |
| // By default, ConcatLayer concatenates blobs along the "channels" axis (1). |
| optional int32 axis = 2 [default = 1]; |
| |
| // DEPRECATED: alias for "axis" -- does not support negative indexing. |
| optional uint32 concat_dim = 1 [default = 1]; |
| } |
| |
| message ContrastiveLossConf { |
| // margin for dissimilar pair |
| optional float margin = 1 [default = 1.0]; |
| // The first implementation of this cost did not exactly match the cost of |
| // Hadsell et al 2006 -- using (margin - d^2) instead of (margin - d)^2. |
| // legacy_version = false (the default) uses (margin - d)^2 as proposed in the |
| // Hadsell paper. New models should probably use this version. |
| // legacy_version = true uses (margin - d^2). This is kept to support / |
| // reproduce existing models and results |
| optional bool legacy_version = 2 [default = false]; |
| } |
| |
| message ConvolutionConf { |
| optional uint32 num_output = 1; // The number of outputs for the layer |
| optional bool bias_term = 2 [default = true]; // whether to have bias terms |
| |
| // Pad, kernel size, and stride are all given as a single value for equal |
| // dimensions in all spatial dimensions, or once per spatial dimension. |
| repeated uint32 pad = 3; // The padding size; defaults to 0 |
| repeated uint32 kernel_size = 4; // The kernel size |
| repeated uint32 stride = 6; // The stride; defaults to 1 |
| |
| // For 2D convolution only, the *_h and *_w versions may also be used to |
| // specify both spatial dimensions. |
| optional uint32 pad_h = 9 [default = 0]; // The padding height (2D only) |
| optional uint32 pad_w = 10 [default = 0]; // The padding width (2D only) |
| optional uint32 kernel_h = 11; // The kernel height (2D only) |
| optional uint32 kernel_w = 12; // The kernel width (2D only) |
| optional uint32 stride_h = 13; // The stride height (2D only) |
| optional uint32 stride_w = 14; // The stride width (2D only) |
| |
| // SINGA: not supported. |
| // optional uint32 group = 5 [default = 1]; // The group size for group conv |
| |
| optional FillerConf weight_filler = 7; // The filler for the weight |
| optional FillerConf bias_filler = 8; // The filler for the bias |
| enum Engine { |
| DEFAULT = 0; |
| CAFFE = 1; |
| CUDNN = 2; |
| } |
| optional Engine engine = 15 [default = DEFAULT]; |
| |
| // The axis to interpret as "channels" when performing convolution. |
| // Preceding dimensions are treated as independent inputs; |
| // succeeding dimensions are treated as "spatial". |
| // With (N, C, H, W) inputs, and axis == 1 (the default), we perform |
| // N independent 2D convolutions, sliding C-channel (or (C/g)-channels, for |
| // groups g>1) filters across the spatial axes (H, W) of the input. |
| // With (N, C, D, H, W) inputs, and axis == 1, we perform |
| // N independent 3D convolutions, sliding (C/g)-channels |
| // filters across the spatial axes (D, H, W) of the input. |
| // SINGA: not supported; |
| // optional int32 axis = 16 [default = 1]; |
| |
| // Whether to force use of the general ND convolution, even if a specific |
| // implementation for blobs of the appropriate number of spatial dimensions |
| // is available. (Currently, there is only a 2D-specific convolution |
| // implementation; for input blobs with num_axes != 2, this option is |
| // ignored and the ND implementation will be used.) |
| // SINGA: not supported; |
| // optional bool force_nd_im2col = 17 [default = false]; |
| |
| |
| // SINGA: add by xiangrui |
| // cudnn workspace size in MB |
| optional int32 workspace_byte_limit = 50 [default = 1024]; |
| // cudnn algorithm preference |
| // options: "fastest", "limited_workspace", "no_workspace" |
| optional string prefer = 51 [default = "fastest"]; |
| } |
| |
| message RNNConf { |
| optional uint32 hidden_size = 1; // The hidden feature size |
| optional uint32 num_stacks = 2; // The number of stacked RNN layers |
| optional float dropout = 3 [default = 0]; |
| optional bool remember_state = 4 [default = false]; |
| // cudnn inputmode |
| // options: "linear", "skip" |
| optional string input_mode = 7 [default = "linear"]; |
| // cudnn direction |
| // options: "unidirectional", "bidirectional" |
| optional string direction = 8 [default = "unidirectional"]; |
| // cudnn RNN mode |
| // options: "relu", "tanh", "lstm", "gru" |
| optional string rnn_mode = 9 [default = "relu"]; |
| } |
| |
| /* |
| message DataConf { |
| enum DB { |
| LEVELDB = 0; |
| LMDB = 1; |
| } |
| // Specify the data source. |
| optional string source = 1; |
| // Specify the batch size. |
| optional uint32 batch_size = 4; |
| // The rand_skip variable is for the data layer to skip a few data points |
| // to avoid all asynchronous sgd clients to start at the same point. The skip |
| // point would be set as rand_skip * rand(0,1). Note that rand_skip should not |
| // be larger than the number of keys in the database. |
| // DEPRECATED. Each solver accesses a different subset of the database. |
| optional uint32 rand_skip = 7 [default = 0]; |
| optional DB backend = 8 [default = LEVELDB]; |
| // DEPRECATED. See TransformationConf. For data pre-processing, we can do |
| // simple scaling and subtracting the data mean, if provided. Note that the |
| // mean subtraction is always carried out before scaling. |
| optional float scale = 2 [default = 1]; |
| optional string mean_file = 3; |
| // DEPRECATED. See TransformationConf. Specify if we would like to randomly |
| // crop an image. |
| optional uint32 crop_size = 5 [default = 0]; |
| // DEPRECATED. See TransformationConf. Specify if we want to randomly mirror |
| // data. |
| optional bool mirror = 6 [default = false]; |
| // Force the encoded image to have 3 color channels |
| optional bool force_encoded_color = 9 [default = false]; |
| // Prefetch queue (Number of batches to prefetch to host memory, increase if |
| // data access bandwidth varies). |
| optional uint32 prefetch = 10 [default = 4]; |
| } |
| */ |
| |
| message DropoutConf { |
| optional float dropout_ratio = 1 [default = 0.5]; // dropout ratio |
| } |
| |
| // DummyDataLayer fills any number of arbitrarily shaped blobs with random |
| // (or constant) data generated by "Fillers" (see "message FillerConf"). |
| message DummyDataConf { |
| // This layer produces N >= 1 top blobs. DummyDataConf must specify 1 or N |
| // shape fields, and 0, 1 or N data_fillers. |
| // |
| // If 0 data_fillers are specified, ConstantFiller with a value of 0 is used. |
| // If 1 data_filler is specified, it is applied to all top blobs. If N are |
| // specified, the ith is applied to the ith top blob. |
| repeated FillerConf data_filler = 1; |
| repeated BlobShape shape = 6; |
| |
| // 4D dimensions -- deprecated. Use "shape" instead. |
| repeated uint32 num = 2; |
| repeated uint32 channels = 3; |
| repeated uint32 height = 4; |
| repeated uint32 width = 5; |
| } |
| |
| message EltwiseConf { |
| enum EltwiseOp { |
| PROD = 0; |
| SUM = 1; |
| MAX = 2; |
| } |
| optional EltwiseOp operation = 1 [default = SUM]; // element-wise operation |
| repeated float coeff = 2; // blob-wise coefficient for SUM operation |
| |
| // Whether to use an asymptotically slower (for >2 inputs) but stabler method |
| // of computing the gradient for the PROD operation. (No effect for SUM op.) |
| optional bool stable_prod_grad = 3 [default = true]; |
| } |
| |
| // Message that stores hyper-parameters used by EmbedLayer |
| message EmbedConf { |
| optional uint32 num_output = 1; // The number of outputs for the layer |
| // The input is given as integers to be interpreted as one-hot |
| // vector indices with dimension num_input. Hence num_input should be |
| // 1 greater than the maximum possible input value. |
| optional uint32 input_dim = 2; |
| |
| optional bool bias_term = 3 [default = true]; // Whether to use a bias term |
| optional FillerConf weight_filler = 4; // The filler for the weight |
| optional FillerConf bias_filler = 5; // The filler for the bias |
| |
| } |
| |
| // Message that stores hyper-parameters used by ExpLayer |
| message ExpConf { |
| // ExpLayer computes outputs y = base ^ (shift + scale * x), for base > 0. |
| // Or if base is set to the default (-1), base is set to e, |
| // so y = exp(shift + scale * x). |
| optional float base = 1 [default = -1.0]; |
| optional float scale = 2 [default = 1.0]; |
| optional float shift = 3 [default = 0.0]; |
| } |
| |
| /// Message that stores hyper-parameters used by FlattenLayer |
| message FlattenConf { |
| // The first axis to flatten: all preceding axes are retained in the output. |
| // May be negative to index from the end (e.g., -1 for the last axis). |
| optional int32 axis = 1 [default = 1]; |
| |
| // The last axis to flatten: all following axes are retained in the output. |
| // May be negative to index from the end (e.g., the default -1 for the last |
| // axis). |
| optional int32 end_axis = 2 [default = -1]; |
| } |
| |
| /* |
| // Message that stores hyper-parameters used by HDF5DataLayer |
| message HDF5DataConf { |
| // Specify the data source. |
| optional string source = 1; |
| // Specify the batch size. |
| optional uint32 batch_size = 2; |
| |
| // Specify whether to shuffle the data. |
| // If shuffle == true, the ordering of the HDF5 files is shuffled, |
| // and the ordering of data within any given HDF5 file is shuffled, |
| // but data between different files are not interleaved; all of a file's |
| // data are output (in a random order) before moving onto another file. |
| optional bool shuffle = 3 [default = false]; |
| } |
| |
| message HDF5OutputConf { |
| optional string file_name = 1; |
| } |
| */ |
| |
| message HingeLossConf { |
| enum Norm { |
| L1 = 1; |
| L2 = 2; |
| } |
| // Specify the Norm to use L1 or L2 |
| optional Norm norm = 1 [default = L1]; |
| } |
| |
| /* |
| message ImageDataConf { |
| // Specify the data source. |
| optional string source = 1; |
| // Specify the batch size. |
| optional uint32 batch_size = 4 [default = 1]; |
| // The rand_skip variable is for the data layer to skip a few data points |
| // to avoid all asynchronous sgd clients to start at the same point. The skip |
| // point would be set as rand_skip * rand(0,1). Note that rand_skip should not |
| // be larger than the number of keys in the database. |
| optional uint32 rand_skip = 7 [default = 0]; |
| // Whether or not ImageLayer should shuffle the list of files at every epoch. |
| optional bool shuffle = 8 [default = false]; |
| // It will also resize images if new_height or new_width are not zero. |
| optional uint32 new_height = 9 [default = 0]; |
| optional uint32 new_width = 10 [default = 0]; |
| // Specify if the images are color or gray |
| optional bool is_color = 11 [default = true]; |
| // DEPRECATED. See TransformationConf. For data pre-processing, we can do |
| // simple scaling and subtracting the data mean, if provided. Note that the |
| // mean subtraction is always carried out before scaling. |
| optional float scale = 2 [default = 1]; |
| optional string mean_file = 3; |
| // DEPRECATED. See TransformationConf. Specify if we would like to randomly |
| // crop an image. |
| optional uint32 crop_size = 5 [default = 0]; |
| // DEPRECATED. See TransformationConf. Specify if we want to randomly mirror |
| // data. |
| optional bool mirror = 6 [default = false]; |
| optional string root_folder = 12 [default = ""]; |
| } |
| */ |
| |
| message InfogainLossConf { |
| // Specify the infogain matrix source. |
| optional string source = 1; |
| } |
| |
| message InnerProductConf { |
| optional uint32 num_output = 1; // The number of outputs for the layer |
| optional bool bias_term = 2 [default = true]; // whether to have bias terms |
| optional FillerConf weight_filler = 3; // The filler for the weight |
| optional FillerConf bias_filler = 4; // The filler for the bias |
| |
| // The first axis to be lumped into a single inner product computation; |
| // all preceding axes are retained in the output. |
| // May be negative to index from the end (e.g., -1 for the last axis). |
| optional int32 axis = 5 [default = 1]; |
| } |
| |
| message DenseConf { |
| optional uint32 num_output = 1; // The number of outputs for the layer |
| optional bool bias_term = 2 [default = true]; // whether to have bias terms |
| optional FillerConf weight_filler = 3; // The filler for the weight |
| optional FillerConf bias_filler = 4; // The filler for the bias |
| |
| // The first axis to be lumped into a single inner product computation; |
| // all preceding axes are retained in the output. |
| // May be negative to index from the end (e.g., -1 for the last axis). |
| optional int32 axis = 5 [default = 1]; |
| |
| optional bool transpose = 21 [default = false]; // whether transpose or not |
| } |
| |
| // Message that stores hyper-parameters used by LogLayer |
| message LogConf { |
| // LogLayer computes outputs y = log_base(shift + scale * x), for base > 0. |
| // Or if base is set to the default (-1), base is set to e, |
| // so y = ln(shift + scale * x) = log_e(shift + scale * x) |
| optional float base = 1 [default = -1.0]; |
| optional float scale = 2 [default = 1.0]; |
| optional float shift = 3 [default = 0.0]; |
| } |
| |
| // Message that stores hyper-parameters used by LRNLayer |
| message LRNConf { |
| optional uint32 local_size = 1 [default = 5]; |
| optional float alpha = 2 [default = 1.]; |
| optional float beta = 3 [default = 0.75]; |
| enum NormRegion { |
| ACROSS_CHANNELS = 0; |
| WITHIN_CHANNEL = 1; |
| } |
| optional NormRegion norm_region = 4 [default = ACROSS_CHANNELS]; |
| optional float k = 5 [default = 1.]; |
| } |
| |
| message MemoryDataConf { |
| optional uint32 batch_size = 1; |
| optional uint32 channels = 2; |
| optional uint32 height = 3; |
| optional uint32 width = 4; |
| } |
| |
| message MVNConf { |
| // This parameter can be set to false to normalize mean only |
| optional bool normalize_variance = 1 [default = true]; |
| |
| // This parameter can be set to true to perform DNN-like MVN |
| optional bool across_channels = 2 [default = false]; |
| |
| // Epsilon for not dividing by zero while normalizing variance |
| optional float eps = 3 [default = 1e-9]; |
| } |
| |
| message PoolingConf { |
| enum PoolMethod { |
| MAX = 0; |
| AVE = 1; |
| STOCHASTIC = 2; |
| } |
| optional PoolMethod pool = 1 [default = MAX]; // The pooling method |
| // Pad, kernel size, and stride are all given as a single value for equal |
| // dimensions in height and width or as Y, X pairs. |
| optional uint32 pad = 4 [default = 0]; // The padding size (equal in Y, X) |
| optional uint32 pad_h = 9 [default = 0]; // The padding height |
| optional uint32 pad_w = 10 [default = 0]; // The padding width |
| optional uint32 kernel_size = 2; // The kernel size (square) |
| optional uint32 kernel_h = 5; // The kernel height |
| optional uint32 kernel_w = 6; // The kernel width |
| optional uint32 stride = 3 [default = 1]; // The stride (equal in Y, X) |
| optional uint32 stride_h = 7; // The stride height |
| optional uint32 stride_w = 8; // The stride width |
| /* |
| enum Engine { |
| DEFAULT = 0; |
| CAFFE = 1; |
| CUDNN = 2; |
| } |
| optional Engine engine = 11 [default = DEFAULT]; |
| */ |
| // If global_pooling then it will pool over the size of the bottom by doing |
| // kernel_h = bottom->height and kernel_w = bottom->width |
| optional bool global_pooling = 12 [default = false]; |
| // whether to propagate nan |
| optional bool nan_prop = 53 [default = false]; |
| |
| // Added by xiangrui, 18 Oct, 2016 |
| optional bool ceil = 60 [default = false]; |
| } |
| |
| message PowerConf { |
| // PowerLayer computes outputs y = (shift + scale * x) ^ power. |
| optional float power = 1 [default = 1.0]; |
| optional float scale = 2 [default = 1.0]; |
| optional float shift = 3 [default = 0.0]; |
| } |
| /* |
| message PythonConf { |
| optional string module = 1; |
| optional string layer = 2; |
| // This value is set to the attribute `param_str` of the `PythonLayer` object |
| // in Python before calling the `setup()` method. This could be a number, |
| // string, dictionary in Python dict format, JSON, etc. You may parse this |
| // string in `setup` method and use it in `forward` and `backward`. |
| optional string param_str = 3 [default = '']; |
| // Whether this PythonLayer is shared among worker solvers during data parallelism. |
| // If true, each worker solver sequentially run forward from this layer. |
| // This value should be set true if you are using it as a data layer. |
| optional bool share_in_parallel = 4 [default = false]; |
| } |
| */ |
| |
| // Message that stores hyper-parameters used by ReductionLayer |
| message ReductionConf { |
| enum ReductionOp { |
| SUM = 1; |
| ASUM = 2; |
| SUMSQ = 3; |
| MEAN = 4; |
| } |
| |
| optional ReductionOp operation = 1 [default = SUM]; // reduction operation |
| |
| // The first axis to reduce to a scalar -- may be negative to index from the |
| // end (e.g., -1 for the last axis). |
| // (Currently, only reduction along ALL "tail" axes is supported; reduction |
| // of axis M through N, where N < num_axes - 1, is unsupported.) |
| // Suppose we have an n-axis bottom Blob with shape: |
| // (d0, d1, d2, ..., d(m-1), dm, d(m+1), ..., d(n-1)). |
| // If axis == m, the output Blob will have shape |
| // (d0, d1, d2, ..., d(m-1)), |
| // and the ReductionOp operation is performed (d0 * d1 * d2 * ... * d(m-1)) |
| // times, each including (dm * d(m+1) * ... * d(n-1)) individual data. |
| // If axis == 0 (the default), the output Blob always has the empty shape |
| // (count 1), performing reduction across the entire input -- |
| // often useful for creating new loss functions. |
| optional int32 axis = 2 [default = 0]; |
| |
| optional float coeff = 3 [default = 1.0]; // coefficient for output |
| } |
| |
| // Message that stores hyper-parameters used by ReLULayer |
| message ReLUConf { |
| // Allow non-zero slope for negative inputs to speed up optimization |
| // Described in: |
| // Maas, A. L., Hannun, A. Y., & Ng, A. Y. (2013). Rectifier nonlinearities |
| // improve neural network acoustic models. In ICML Workshop on Deep Learning |
| // for Audio, Speech, and Language Processing. |
| optional float negative_slope = 1 [default = 0]; |
| /* |
| enum Engine { |
| DEFAULT = 0; |
| CAFFE = 1; |
| CUDNN = 2; |
| } |
| optional Engine engine = 2 [default = DEFAULT]; |
| */ |
| } |
| |
| message ReshapeConf { |
| // Specify the output dimensions. If some of the dimensions are set to 0, |
| // the corresponding dimension from the bottom layer is used (unchanged). |
| // Exactly one dimension may be set to -1, in which case its value is |
| // inferred from the count of the bottom blob and the remaining dimensions. |
| // For example, suppose we want to reshape a 2D blob "input" with shape 2 x 8: |
| // |
| // layer { |
| // type: "Reshape" bottom: "input" top: "output" |
| // reshape_param { ... } |
| // } |
| // |
| // If "input" is 2D with shape 2 x 8, then the following reshape_param |
| // specifications are all equivalent, producing a 3D blob "output" with shape |
| // 2 x 2 x 4: |
| // |
| // reshape_param { shape { dim: 2 dim: 2 dim: 4 } } |
| // reshape_param { shape { dim: 0 dim: 2 dim: 4 } } |
| // reshape_param { shape { dim: 0 dim: 2 dim: -1 } } |
| // reshape_param { shape { dim: -1 dim: 0 dim: 2 } } |
| // |
| optional BlobShape shape = 1; |
| |
| // axis and num_axes control the portion of the bottom blob's shape that are |
| // replaced by (included in) the reshape. By default (axis == 0 and |
| // num_axes == -1), the entire bottom blob shape is included in the reshape, |
| // and hence the shape field must specify the entire output shape. |
| // |
| // axis may be non-zero to retain some portion of the beginning of the input |
| // shape (and may be negative to index from the end; e.g., -1 to begin the |
| // reshape after the last axis, including nothing in the reshape, |
| // -2 to include only the last axis, etc.). |
| // |
| // For example, suppose "input" is a 2D blob with shape 2 x 8. |
| // Then the following ReshapeLayer specifications are all equivalent, |
| // producing a blob "output" with shape 2 x 2 x 4: |
| // |
| // reshape_param { shape { dim: 2 dim: 2 dim: 4 } } |
| // reshape_param { shape { dim: 2 dim: 4 } axis: 1 } |
| // reshape_param { shape { dim: 2 dim: 4 } axis: -3 } |
| // |
| // num_axes specifies the extent of the reshape. |
| // If num_axes >= 0 (and axis >= 0), the reshape will be performed only on |
| // input axes in the range [axis, axis+num_axes]. |
| // num_axes may also be -1, the default, to include all remaining axes |
| // (starting from axis). |
| // |
| // For example, suppose "input" is a 2D blob with shape 2 x 8. |
| // Then the following ReshapeLayer specifications are equivalent, |
| // producing a blob "output" with shape 1 x 2 x 8. |
| // |
| // reshape_param { shape { dim: 1 dim: 2 dim: 8 } } |
| // reshape_param { shape { dim: 1 dim: 2 } num_axes: 1 } |
| // reshape_param { shape { dim: 1 } num_axes: 0 } |
| // |
| // On the other hand, these would produce output blob shape 2 x 1 x 8: |
| // |
| // reshape_param { shape { dim: 2 dim: 1 dim: 8 } } |
| // reshape_param { shape { dim: 1 } axis: 1 num_axes: 0 } |
| // |
| optional int32 axis = 2 [default = 0]; |
| optional int32 num_axes = 3 [default = -1]; |
| } |
| |
| message SigmoidConf { |
| enum Engine { |
| DEFAULT = 0; |
| CAFFE = 1; |
| CUDNN = 2; |
| } |
| optional Engine engine = 1 [default = DEFAULT]; |
| } |
| |
| message SliceConf { |
| // The axis along which to slice -- may be negative to index from the end |
| // (e.g., -1 for the last axis). |
| // By default, SliceLayer concatenates blobs along the "channels" axis (1). |
| optional int32 axis = 3 [default = 1]; |
| repeated uint32 slice_point = 2; |
| |
| // DEPRECATED: alias for "axis" -- does not support negative indexing. |
| optional uint32 slice_dim = 1 [default = 1]; |
| } |
| |
| // Message that stores hyper-parameters used by SoftmaxLayer, SoftmaxWithLossLayer |
| message SoftmaxConf { |
| enum Engine { |
| DEFAULT = 0; |
| CAFFE = 1; |
| CUDNN = 2; |
| } |
| optional Engine engine = 1 [default = DEFAULT]; |
| |
| // The axis along which to perform the softmax -- may be negative to index |
| // from the end (e.g., -1 for the last axis). |
| // Any other axes will be evaluated as independent softmaxes. |
| // optional int32 axis = 2 [default = 1]; |
| |
| /// The cudnn algorithm preferences |
| /// Options are: accurate, fast and log |
| optional string algorithm = 50 [default = "accurate"]; |
| } |
| |
| message TanHConf { |
| enum Engine { |
| DEFAULT = 0; |
| CAFFE = 1; |
| CUDNN = 2; |
| } |
| optional Engine engine = 1 [default = DEFAULT]; |
| } |
| |
| // Message that stores hyper-parameters used by TileLayer |
| message TileConf { |
| // The index of the axis to tile. |
| optional int32 axis = 1 [default = 1]; |
| |
| // The number of copies (tiles) of the blob to output. |
| optional int32 tiles = 2; |
| } |
| |
| // Message that stores hyper-parameters used by ThresholdLayer |
| message ThresholdConf { |
| optional float threshold = 1 [default = 0]; // Strictly positive values |
| } |
| |
| /* |
| message WindowDataConf { |
| // Specify the data source. |
| optional string source = 1; |
| // For data pre-processing, we can do simple scaling and subtracting the |
| // data mean, if provided. Note that the mean subtraction is always carried |
| // out before scaling. |
| optional float scale = 2 [default = 1]; |
| optional string mean_file = 3; |
| // Specify the batch size. |
| optional uint32 batch_size = 4; |
| // Specify if we would like to randomly crop an image. |
| optional uint32 crop_size = 5 [default = 0]; |
| // Specify if we want to randomly mirror data. |
| optional bool mirror = 6 [default = false]; |
| // Foreground (object) overlap threshold |
| optional float fg_threshold = 7 [default = 0.5]; |
| // Background (non-object) overlap threshold |
| optional float bg_threshold = 8 [default = 0.5]; |
| // Fraction of batch that should be foreground objects |
| optional float fg_fraction = 9 [default = 0.25]; |
| // Amount of contextual padding to add around a window |
| // (used only by the window_data_layer) |
| optional uint32 context_pad = 10 [default = 0]; |
| // Mode for cropping out a detection window |
| // warp: cropped window is warped to a fixed size and aspect ratio |
| // square: the tightest square around the window is cropped |
| optional string crop_mode = 11 [default = "warp"]; |
| // cache_images: will load all images in memory for faster access |
| optional bool cache_images = 12 [default = false]; |
| // append root_folder to locate images |
| optional string root_folder = 13 [default = ""]; |
| } |
| */ |
| |
| message SPPConf { |
| enum PoolMethod { |
| MAX = 0; |
| AVE = 1; |
| STOCHASTIC = 2; |
| } |
| optional uint32 pyramid_height = 1; |
| optional PoolMethod pool = 2 [default = MAX]; // The pooling method |
| enum Engine { |
| DEFAULT = 0; |
| CAFFE = 1; |
| CUDNN = 2; |
| } |
| optional Engine engine = 6 [default = DEFAULT]; |
| } |
| |
| message PReLUConf { |
| // Parametric ReLU described in K. He et al, Delving Deep into Rectifiers: |
| // Surpassing Human-Level Performance on ImageNet Classification, 2015. |
| |
| // Initial value of a_i. Default is a_i=0.25 for all i. |
| optional FillerConf filler = 1; |
| // Whether or not slope paramters are shared across channels. |
| optional bool channel_shared = 2 [default = false]; |
| |
| optional string format = 20 [default = "NCHW"]; |
| } |
| |
| message BatchNormConf { |
| // Used in the moving average computation runningMean = |
| // newMean*factor + runningMean*(1-factor). |
| optional double factor = 1 [default = 0.9]; |
| } |
| |
| message SplitConf { |
| // Indicate the number of outputs |
| optional int32 output_size = 1 [default = 2]; |
| } |