example/speech_recognition/deepspeech.cfg - mxnet - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 [common]
 # method can be one of the followings - train,predict,load
 mode = train
 #ex: gpu0,gpu1,gpu2,gpu3
 context = gpu0,gpu1,gpu2
 #context = gpu0
 # checkpoint prefix, check point will be saved under checkpoints folder with prefix
 prefix = deep_bucket
 # when mode is load or predict, model will be loaded from the file name with model_file under checkpoints
 model_file = deep_bucketn_epoch0n_batch-0018
 batch_size = 12
 #batch_size=4
 # log will be saved by the log_filename
 log_filename = deep_bucket.log
 # checkpoint set n to save checkpoints after n epoch
 save_checkpoint_every_n_epoch = 1
 save_checkpoint_every_n_batch = 3000
 is_bi_graphemes = True
 tensorboard_log_dir = tblog/deep_bucket
 # if random_seed is -1 then it gets random seed from timestamp
 mx_random_seed = -1
 random_seed = -1
 kvstore_option = device

 [data]
 max_duration = 16.0
 train_json = ./train_corpus_all.json
 test_json = ./test_corpus.json
 val_json = ./test_corpus.json

 language = en
 width = 161
 height = 1
 channel = 1
 stride = 1

 [arch]
 channel_num = 32
 conv_layer1_filter_dim = [11, 41]
 conv_layer1_stride = [2, 2]
 conv_layer2_filter_dim = [11, 21]
 conv_layer2_stride = [1, 2]

 num_rnn_layer = 5
 num_hidden_rnn_list = [1760, 1760, 1760, 1760, 1760]
 num_hidden_proj = 0

 num_rear_fc_layers = 1
 num_hidden_rear_fc_list = [1760]
 act_type_rear_fc_list = ["relu"]

 #network: lstm, bilstm, gru, bigru
 rnn_type = bigru
 #vanilla_lstm or fc_lstm (no effect when network_type is gru, bigru)
 lstm_type = fc_lstm
 is_batchnorm = True
 is_bucketing = True
 buckets = [200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600]

 [train]
 num_epoch = 70
 learning_rate = 0.0003
 # constant learning rate annealing by factor
 learning_rate_annealing = 1.1
 initializer = Xavier
 init_scale = 2
 factor_type = in
 # show progress every how nth batches
 show_every = 100
 save_optimizer_states = True
 normalize_target_k = 100000
 # overwrite meta files(feats_mean,feats_std,unicode_en_baidu_bi_graphemes.csv)
 overwrite_meta_files = True
 overwrite_bi_graphemes_dictionary = False
 # save feature extracted from soundfile as csvfile, it can take too much disk space
 save_feature_as_csvfile = False
 enable_logging_train_metric = True
 enable_logging_validation_metric = True

 [load]
 load_optimizer_states = True
 is_start_from_batch = True

 [optimizer]
 optimizer = sgd
 # define parameters for optimizer
 # optimizer_params_dictionary should use " not ' as string wrapper
 # sgd/nag
 optimizer_params_dictionary={"momentum":0.9}
 # dcasgd
 # optimizer_params_dictionary={"momentum":0.9, "lamda":1.0}
 # adam
 # optimizer_params_dictionary={"beta1":0.9,"beta2":0.999}
 # adagrad
 # optimizer_params_dictionary={"eps":1e-08}
 # rmsprop
 # optimizer_params_dictionary={"gamma1":0.9, "gamma2":0.9,"epsilon":1e-08}
 # adadelta
 # optimizer_params_dictionary={"rho":0.95, "epsilon":1e-08}
 # set to 0 to disable gradient clipping
 clip_gradient = 100
 weight_decay = 0.
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	[common]
	# method can be one of the followings - train,predict,load
	mode = train
	#ex: gpu0,gpu1,gpu2,gpu3
	context = gpu0,gpu1,gpu2
	#context = gpu0
	# checkpoint prefix, check point will be saved under checkpoints folder with prefix
	prefix = deep_bucket
	# when mode is load or predict, model will be loaded from the file name with model_file under checkpoints
	model_file = deep_bucketn_epoch0n_batch-0018
	batch_size = 12
	#batch_size=4
	# log will be saved by the log_filename
	log_filename = deep_bucket.log
	# checkpoint set n to save checkpoints after n epoch
	save_checkpoint_every_n_epoch = 1
	save_checkpoint_every_n_batch = 3000
	is_bi_graphemes = True
	tensorboard_log_dir = tblog/deep_bucket
	# if random_seed is -1 then it gets random seed from timestamp
	mx_random_seed = -1
	random_seed = -1
	kvstore_option = device

	[data]
	max_duration = 16.0
	train_json = ./train_corpus_all.json
	test_json = ./test_corpus.json
	val_json = ./test_corpus.json

	language = en
	width = 161
	height = 1
	channel = 1
	stride = 1

	[arch]
	channel_num = 32
	conv_layer1_filter_dim = [11, 41]
	conv_layer1_stride = [2, 2]
	conv_layer2_filter_dim = [11, 21]
	conv_layer2_stride = [1, 2]

	num_rnn_layer = 5
	num_hidden_rnn_list = [1760, 1760, 1760, 1760, 1760]
	num_hidden_proj = 0

	num_rear_fc_layers = 1
	num_hidden_rear_fc_list = [1760]
	act_type_rear_fc_list = ["relu"]

	#network: lstm, bilstm, gru, bigru
	rnn_type = bigru
	#vanilla_lstm or fc_lstm (no effect when network_type is gru, bigru)
	lstm_type = fc_lstm
	is_batchnorm = True
	is_bucketing = True
	buckets = [200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600]

	[train]
	num_epoch = 70
	learning_rate = 0.0003
	# constant learning rate annealing by factor
	learning_rate_annealing = 1.1
	initializer = Xavier
	init_scale = 2
	factor_type = in
	# show progress every how nth batches
	show_every = 100
	save_optimizer_states = True
	normalize_target_k = 100000
	# overwrite meta files(feats_mean,feats_std,unicode_en_baidu_bi_graphemes.csv)
	overwrite_meta_files = True
	overwrite_bi_graphemes_dictionary = False
	# save feature extracted from soundfile as csvfile, it can take too much disk space
	save_feature_as_csvfile = False
	enable_logging_train_metric = True
	enable_logging_validation_metric = True

	[load]
	load_optimizer_states = True
	is_start_from_batch = True

	[optimizer]
	optimizer = sgd
	# define parameters for optimizer
	# optimizer_params_dictionary should use " not ' as string wrapper
	# sgd/nag
	optimizer_params_dictionary={"momentum":0.9}
	# dcasgd
	# optimizer_params_dictionary={"momentum":0.9, "lamda":1.0}
	# adam
	# optimizer_params_dictionary={"beta1":0.9,"beta2":0.999}
	# adagrad
	# optimizer_params_dictionary={"eps":1e-08}
	# rmsprop
	# optimizer_params_dictionary={"gamma1":0.9, "gamma2":0.9,"epsilon":1e-08}
	# adadelta
	# optimizer_params_dictionary={"rho":0.95, "epsilon":1e-08}
	# set to 0 to disable gradient clipping
	clip_gradient = 100
	weight_decay = 0.