| name:"char-rnn" |
| train_steps: 100000 |
| disp_freq: 100 |
| #debug: true |
| gpu: 0 |
| train_one_batch { |
| alg: kBPTT |
| } |
| |
| updater { |
| type: kRMSProp |
| rmsprop_conf { |
| rho: 0.95 |
| } |
| learning_rate { |
| type: kStep |
| base_lr: 0.002 |
| step_conf { |
| gamma: 0.97 |
| change_freq: 2000 |
| } |
| } |
| clip_low: -5 |
| clip_high: 5 |
| } |
| |
| neuralnet { |
| unroll_len: 50 |
| layer { |
| name: "data" |
| type: kCharRNN |
| unroll_len: 1 |
| char_rnn_conf { |
| path: "examples/char-rnn/linux_input.txt" |
| vocab_path:"examples/char-rnn/vocab.txt" |
| batchsize: 50 |
| unroll_len: 50 |
| } |
| } |
| layer { |
| name: "onehot" |
| type: kOneHot |
| srclayers: "data" |
| unroll_conn_type: kUnrollOneToAll |
| onehot_conf { |
| vocab_size: 101 |
| } |
| } |
| |
| layer { |
| name: "label" |
| type: kRNNLabel |
| srclayers: "data" |
| unroll_conn_type: kUnrollOneToAll |
| } |
| |
| layer { |
| name: "gru1" |
| type: kGRU |
| srclayers: "onehot" |
| gru_conf { |
| dim_hidden: 512 |
| } |
| param { |
| name: "z_hx" |
| init { |
| type: kUniform |
| low: -0.08 |
| high: 0.08 |
| } |
| } |
| param { |
| name: "r_hx" |
| init { |
| type: kUniform |
| low: -0.08 |
| high: 0.08 |
| } |
| } |
| param { |
| name: "c_hx" |
| init { |
| type: kUniform |
| low: -0.08 |
| high: 0.08 |
| } |
| } |
| param { |
| name: "z_hh" |
| init { |
| type: kUniform |
| low: -0.08 |
| high: 0.08 |
| } |
| } |
| param { |
| name: "r_hh" |
| init { |
| type: kUniform |
| low: -0.08 |
| high: 0.08 |
| } |
| } |
| param { |
| name: "c_hh" |
| init { |
| type: kUniform |
| low: -0.08 |
| high: 0.08 |
| } |
| } |
| param { |
| name: "z_b" |
| init { |
| type: kUniform |
| low: -0.08 |
| high: 0.08 |
| } |
| } |
| param { |
| name: "r_b" |
| init { |
| type: kUniform |
| low: -0.08 |
| high: 0.08 |
| } |
| } |
| param { |
| name: "c_b" |
| init { |
| type: kUniform |
| low: -0.08 |
| high: 0.08 |
| } |
| } |
| |
| } |
| # layer { |
| # name: "gru2" |
| # type: kGRU |
| # srclayers: "gru1" |
| # gru_conf { |
| # dim_hidden: 512 |
| # } |
| # param { |
| # name: "z_hx2" |
| # init { |
| # type: kUniform |
| # low: -0.08 |
| # high: 0.08 |
| # } |
| # } |
| # param { |
| # name: "r_hx2" |
| # init { |
| # type: kUniform |
| # low: -0.08 |
| # high: 0.08 |
| # } |
| # } |
| # param { |
| # name: "c_hx2" |
| # init { |
| # type: kUniform |
| # low: -0.08 |
| # high: 0.08 |
| # } |
| # } |
| # param { |
| # name: "z_hh2" |
| # init { |
| # type: kUniform |
| # low: -0.08 |
| # high: 0.08 |
| # } |
| # } |
| # param { |
| # name: "r_hh2" |
| # init { |
| # type: kUniform |
| # low: -0.08 |
| # high: 0.08 |
| # } |
| # } |
| # param { |
| # name: "c_hh2" |
| # init { |
| # type: kUniform |
| # low: -0.08 |
| # high: 0.08 |
| # } |
| # } |
| # param { |
| # name: "z_b2" |
| # init { |
| # type: kUniform |
| # low: -0.08 |
| # high: 0.08 |
| # } |
| # } |
| # param { |
| # name: "r_b2" |
| # init { |
| # type: kUniform |
| # low: -0.08 |
| # high: 0.08 |
| # } |
| # } |
| # param { |
| # name: "c_b2" |
| # init { |
| # type: kUniform |
| # low: -0.08 |
| # high: 0.08 |
| # } |
| # } |
| # } |
| # |
| layer { |
| name: "ip1" |
| type: kInnerProduct |
| srclayers: "gru1" |
| innerproduct_conf { |
| num_output: 101 |
| } |
| param { |
| name: "w" |
| init { |
| type: kUniform |
| low: -0.08 |
| high: 0.08 |
| } |
| } |
| param { |
| name: "b" |
| init { |
| type: kUniform |
| low: -0.08 |
| high: 0.08 |
| } |
| } |
| } |
| layer { |
| name: "loss" |
| type: kSoftmaxLoss |
| srclayers: "ip1" |
| srclayers: "label" |
| } |
| } |
| |
| cluster { |
| workspace: "examples/char-rnn/" |
| } |