blob: c48d752d2d0ccc4b720fe86c9f25b7457e097bab [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import mxnet as mx
def get_symbol_atari(act_dim):
net = mx.symbol.Variable('data')
net = mx.symbol.Cast(data=net, dtype='float32')
net = mx.symbol.Convolution(data=net, name='conv1', kernel=(8, 8), stride=(4, 4), num_filter=16)
net = mx.symbol.Activation(data=net, name='relu1', act_type="relu")
net = mx.symbol.Convolution(data=net, name='conv2', kernel=(4, 4), stride=(2, 2), num_filter=32)
net = mx.symbol.Activation(data=net, name='relu2', act_type="relu")
net = mx.symbol.Flatten(data=net)
net = mx.symbol.FullyConnected(data=net, name='fc4', num_hidden=256)
net = mx.symbol.Activation(data=net, name='relu4', act_type="relu")
fc_policy = mx.symbol.FullyConnected(data=net, name='fc_policy', num_hidden=act_dim)
policy = mx.symbol.SoftmaxOutput(data=fc_policy, name='policy', out_grad=True)
entropy = mx.symbol.SoftmaxActivation(data=fc_policy, name='entropy')
value = mx.symbol.FullyConnected(data=net, name='fc_value', num_hidden=1)
value = mx.symbol.LinearRegressionOutput(data=value, name='value')
return mx.symbol.Group([policy, entropy, value])