blob: e574ae2b4379206de449af2050bfb4ab42ae41f7 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*!
* \file fully_conn_perf.cc
* \brief Sample for running C++ performance tests on a single operator. This method is also
* useful for profiling with vtune or gprof, avoiding the "noise" of python and executor
* \author Chris Olivier
*/
#include <dmlc/logging.h>
#include <mxnet/tensor_blob.h>
#include <nnvm/tuple.h>
#include "../../src/operator/nn/fully_connected-inl.h"
#include "../include/test_op_runner.h"
#include "../include/test_core_op.h"
using namespace mxnet;
typedef std::vector<std::pair<std::string, std::string> > kwargs_t;
const kwargs_t basic_fullyconn_args = { {"num_hidden", "250"}, {"no_bias", "true"} };
/*!
* \brief Generic bidirectional sanity test
*/
TEST(FULLY_CONNECTED, ExecuteBidirectionalFullyConnected) {
TShape shape1({5, 5});
TShape shape2({250, 5});
kwargs_t kwargs = basic_fullyconn_args;
test::op::CoreOperatorRunner<float> runner;
runner.set_verbose(true);
kwargs = test::op::CoreOpExecutor<float>::ArgsWithOpName(kwargs, "FullyConnected",
"_backward_FullyConnected");
runner.RunBidirectional(false, { shape1, shape2 }, kwargs, 1);
}
/*!
* \brief Timing test for CPU
*/
TEST(FULLY_CONNECTED, FullyConnectedTimingCPU) {
kwargs_t kwargs = basic_fullyconn_args;
TShape shape1({10, 10, 10, 10});
TShape shape2({250, 1000});
test::op::CoreOperatorRunner<float> runner;
kwargs = test::op::CoreOpExecutor<float>::ArgsWithOpName(kwargs, "FullyConnected",
"_backward_FullyConnected");
runner.RunBidirectional(false, { shape1, shape2 }, kwargs, 1);
std::vector <TShape> shapes;
if (test::performance_run) {
shapes = {
{1, 1, 28, 28},
{1, 3, 28, 28},
{50, 1, 18, 32},
{50, 3, 18, 32},
{20, 3, 128, 128}
};
} else {
shapes = {
{1, 1, 28, 28},
{50, 3, 18, 32},
};
}
for (const TShape& shape : shapes) {
TShape shape2({250, static_cast<nnvm::dim_t>(shape.ProdShape(1, shape.ndim()))});
kwargs = test::op::CoreOpExecutor<float>::ArgsWithOpName(kwargs, "FullyConnected",
"_backward_FullyConnected");
runner.TimingTest("Fully connected CPU", false, false, kwargs, 2, 10,
{ shape, shape2 }, false);
}
}
#if MXNET_USE_CUDA == 1
/*!
* \brief Timing test for GPU
*/
TEST(FULLY_CONNECTED, FullyConnectedTimingGPU) {
kwargs_t kwargs = basic_fullyconn_args;
TShape shape1({10, 10, 10, 10});
TShape shape2({250, 1000});
test::op::CoreOperatorRunner<float> runner;
kwargs = test::op::CoreOpExecutor<float>::ArgsWithOpName(kwargs, "FullyConnected",
"_backward_FullyConnected");
runner.RunBidirectional(false, { shape1, shape2 }, kwargs, 1);
std::vector <TShape> shapes;
if (test::performance_run) {
shapes = {
{1, 1, 28, 28},
{1, 3, 28, 28},
{50, 1, 18, 32},
{50, 3, 18, 32},
{20, 3, 128, 128}
};
} else {
shapes = {
{1, 1, 28, 28},
{50, 3, 18, 32},
};
}
for (const TShape& shape : shapes) {
TShape shape2({250, static_cast<nnvm::dim_t>(shape.ProdShape(1, shape.ndim()))});
kwargs = test::op::CoreOpExecutor<float>::ArgsWithOpName(kwargs, "FullyConnected",
"_backward_FullyConnected");
runner.TimingTest("Fully connected GPU", true, false, kwargs, 2, 10,
{ shape, shape2 }, false);
}
}
#endif // MXNET_USE_CUDA == 1