| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| # pylint: skip-file |
| import mxnet as mx |
| from mxnet.test_utils import get_mnist_iterator |
| import numpy as np |
| import logging |
| |
| |
| class Softmax(mx.operator.CustomOp): |
| def forward(self, is_train, req, in_data, out_data, aux): |
| x = in_data[0].asnumpy() |
| y = np.exp(x - x.max(axis=1).reshape((x.shape[0], 1))) |
| y /= y.sum(axis=1).reshape((x.shape[0], 1)) |
| self.assign(out_data[0], req[0], mx.nd.array(y)) |
| |
| def backward(self, req, out_grad, in_data, out_data, in_grad, aux): |
| l = in_data[1].asnumpy().ravel().astype(np.int) |
| y = out_data[0].asnumpy() |
| y[np.arange(l.shape[0]), l] -= 1.0 |
| self.assign(in_grad[0], req[0], mx.nd.array(y)) |
| |
| @mx.operator.register("softmax") |
| class SoftmaxProp(mx.operator.CustomOpProp): |
| def __init__(self): |
| super(SoftmaxProp, self).__init__(need_top_grad=False) |
| |
| def list_arguments(self): |
| return ['data', 'label'] |
| |
| def list_outputs(self): |
| return ['output'] |
| |
| def infer_shape(self, in_shape): |
| data_shape = in_shape[0] |
| label_shape = (in_shape[0][0],) |
| output_shape = in_shape[0] |
| return [data_shape, label_shape], [output_shape], [] |
| |
| def infer_type(self, in_type): |
| return in_type, [in_type[0]], [] |
| |
| def create_operator(self, ctx, shapes, dtypes): |
| return Softmax() |
| |
| # define mlp |
| |
| data = mx.symbol.Variable('data') |
| fc1 = mx.symbol.FullyConnected(data = data, name='fc1', num_hidden=128) |
| act1 = mx.symbol.Activation(data = fc1, name='relu1', act_type="relu") |
| fc2 = mx.symbol.FullyConnected(data = act1, name = 'fc2', num_hidden = 64) |
| act2 = mx.symbol.Activation(data = fc2, name='relu2', act_type="relu") |
| fc3 = mx.symbol.FullyConnected(data = act2, name='fc3', num_hidden=10) |
| #mlp = mx.symbol.Softmax(data = fc3, name = 'softmax') |
| mlp = mx.symbol.Custom(data=fc3, name='softmax', op_type='softmax') |
| |
| # data |
| |
| train, val = get_mnist_iterator(batch_size=100, input_shape = (784,)) |
| |
| # train |
| |
| logging.basicConfig(level=logging.DEBUG) |
| |
| # MXNET_CPU_WORKER_NTHREADS must be greater than 1 for custom op to work on CPU |
| context=mx.cpu() |
| # Uncomment this line to train on GPU |
| # context=mx.gpu(0) |
| |
| mod = mx.mod.Module(mlp, context=context) |
| |
| mod.fit(train_data=train, eval_data=val, optimizer='sgd', |
| optimizer_params={'learning_rate':0.1, 'momentum': 0.9, 'wd': 0.00001}, |
| num_epoch=10, batch_end_callback=mx.callback.Speedometer(100, 100)) |