blob: fc8c8b08913e6dd98705dfb52549ffd68a7fc744 [file] [log] [blame]
/*********************************************************
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
************************************************************/
#include "../src/model/operation/batchnorm.h"
#include "gtest/gtest.h"
#include <iostream>
using namespace singa;
#ifdef USE_MKLDNN
TEST(OperationBatchNorm, ForwardInference) {
const float x_data[] = {1, 2,
3, 4};
Tensor in(Shape{2, 2});
in.CopyDataFromHostPtr(x_data, 2 * 2);
const float alpha_[] = {1, 1};
Tensor alpha(Shape{2});
alpha.CopyDataFromHostPtr(alpha_, 2);
const float beta_[] = {2, 2};
Tensor beta(Shape{2});
beta.CopyDataFromHostPtr(beta_, 2);
const float mean_[] = {2, 3};
Tensor moving_mean(Shape{2});
moving_mean.CopyDataFromHostPtr(mean_, 2);
const float var_[] = {1, 1};
Tensor moving_var(Shape{2});
moving_var.CopyDataFromHostPtr(var_, 2);
// momentum
BatchNormHandle batch_norm_handle(0u,in);
Tensor y = CpuBatchNormForwardInference(batch_norm_handle, in, alpha, beta, moving_mean, moving_var);
const float *outptr = y.data<float>();
const auto &shape = y.shape();
EXPECT_EQ(2u, shape.size());
EXPECT_EQ(2u, shape[0]);
EXPECT_EQ(2u, shape[1]);
EXPECT_NEAR(1.0f, outptr[0], 1e-4f);
EXPECT_NEAR(1.0f, outptr[1], 1e-4f);
EXPECT_NEAR(3.0f, outptr[2], 1e-4f);
EXPECT_NEAR(3.0f, outptr[3], 1e-4f);
}
TEST(OperationBatchNorm, ForwardInference4D) {
float x_data[] = {
0.0736655, 0.0459045, 0.0779517, 0.0771059,
0.0586862, 0.0561263, 0.0708457, 0.0977273,
0.0405025, -0.170897, 0.0208982, 0.136865,
-0.0367905, -0.0618205, -0.0103908, -0.0522777,
-0.122161, -0.025427, -0.0718576, -0.185941,
0.0166533, 0.178679, -0.0576606, -0.137817,
0.150676, 0.153442, -0.0929899, -0.148675,
-0.112459, -0.106284, -0.103074, -0.0668811
};
Tensor in(Shape{1, 2, 4, 4});
in.CopyDataFromHostPtr(x_data, 2*4*4);
const float alpha_[] = {1,1};
Tensor alpha(Shape{2});
alpha.CopyDataFromHostPtr(alpha_, 2);
const float beta_[] = {0,0};
Tensor beta(Shape{2});
beta.CopyDataFromHostPtr(beta_, 2);
const float mean_[] = { 0.02650639, -0.04573606};
Tensor moving_mean(Shape{2});
moving_mean.CopyDataFromHostPtr(mean_, 2);
const float var_[] = {0.00546934, 0.01202502};
Tensor moving_var(Shape{2});
moving_var.CopyDataFromHostPtr(var_, 2);
// momentum
BatchNormHandle batch_norm_handle(0.0f,in);
Tensor y = CpuBatchNormForwardInference(batch_norm_handle, in, alpha, beta, moving_mean, moving_var);
// y = {1,1,1,1, 3,3,3,3}
const float *outptr = y.data<float>();
const auto &shape = y.shape();
EXPECT_EQ(4u, shape.size());
EXPECT_EQ(1u, shape[0]);
EXPECT_EQ(2u, shape[1]);
EXPECT_EQ(4u, shape[2]);
EXPECT_EQ(4u, shape[3]);
EXPECT_NEAR(0.637092, outptr[0], 1e-4f);
EXPECT_NEAR(0.262057, outptr[1], 1e-4f);
EXPECT_NEAR(0.694995, outptr[2], 1e-4f);
EXPECT_NEAR(0.683569, outptr[3], 1e-4f);
EXPECT_NEAR(0.434730, outptr[4], 1e-4f);
EXPECT_NEAR(0.400147, outptr[5], 1e-4f);
EXPECT_NEAR(0.598998, outptr[6], 1e-4f);
EXPECT_NEAR(0.962152, outptr[7], 1e-4f);
EXPECT_NEAR(0.189079, outptr[8], 1e-4f);
EXPECT_NEAR(-2.66680, outptr[9], 1e-4f);
EXPECT_NEAR(-0.07576, outptr[10], 1e-4f);
EXPECT_NEAR(1.490880, outptr[11], 1e-4f);
EXPECT_NEAR(-0.85510, outptr[12], 1e-4f);
EXPECT_NEAR(-1.19324, outptr[13], 1e-4f);
EXPECT_NEAR(-0.49845, outptr[14], 1e-4f);
EXPECT_NEAR(-1.06433, outptr[15], 1e-4f);
EXPECT_NEAR(-0.69664, outptr[16], 1e-4f);
EXPECT_NEAR(0.185125, outptr[17], 1e-4f);
EXPECT_NEAR(-0.23810, outptr[18], 1e-4f);
EXPECT_NEAR(-1.27803, outptr[19], 1e-4f);
EXPECT_NEAR(0.568704, outptr[20], 1e-4f);
EXPECT_NEAR(2.045640, outptr[21], 1e-4f);
EXPECT_NEAR(-0.10869, outptr[22], 1e-4f);
EXPECT_NEAR(-0.83935, outptr[23], 1e-4f);
EXPECT_NEAR(1.790380, outptr[24], 1e-4f);
EXPECT_NEAR(1.815590, outptr[25], 1e-4f);
EXPECT_NEAR(-0.43073, outptr[26], 1e-4f);
EXPECT_NEAR(-0.93833, outptr[27], 1e-4f);
EXPECT_NEAR(-0.60820, outptr[28], 1e-4f);
EXPECT_NEAR(-0.55192, outptr[29], 1e-4f);
EXPECT_NEAR(-0.52265, outptr[30], 1e-4f);
EXPECT_NEAR(-0.19274, outptr[31], 1e-4f);
}
TEST(OperationBatchNorm, ForwardTraining) {
const float x_data[] = {1, 2, 3, 4};
Tensor x(Shape{2, 2});
x.CopyDataFromHostPtr(x_data, 2 * 2);
const float y_data[] = {9, 9, 9, 9};
Tensor y(Shape{2, 2});
y.CopyDataFromHostPtr(y_data, 2 * 2);
const float dy_data[] = {4, 3, 2, 1};
Tensor dy(Shape{2, 2});
dy.CopyDataFromHostPtr(dy_data, 2 * 2);
const float alpha_[] = {1, 1};
Tensor alpha(Shape{2});
alpha.CopyDataFromHostPtr(alpha_, 2);
const float beta_[] = {0, 0};
Tensor beta(Shape{2});
beta.CopyDataFromHostPtr(beta_, 2);
// 0 momentum will ignore running mean and var
BatchNormHandle batch_norm_handle(0.3f,x);
const float running_mean_[] = {0,0};
Tensor running_mean(Shape{2});
Tensor running_var(Shape{2});
running_mean.CopyDataFromHostPtr(running_mean_, 2);
running_var.CopyDataFromHostPtr(running_mean_, 2);
// training operation calculate the running mean and var for backward
auto ret1 = CpuBatchNormForwardTraining(batch_norm_handle, x, alpha, beta, running_mean, running_var);
const float *yptr = ret1[0].data<float>();
EXPECT_NEAR(-1.0f, yptr[0], 1e-4f);
EXPECT_NEAR(-1.0f, yptr[1], 1e-4f);
EXPECT_NEAR(1.0f, yptr[2], 1e-4f);
EXPECT_NEAR(1.0f, yptr[3], 1e-4f);
const float *meanptr = ret1[1].data<float>();
EXPECT_NEAR(1.4f, meanptr[0], 1e-4f);
EXPECT_NEAR(2.1f, meanptr[1], 1e-4f);
const float *varptr = ret1[2].data<float>();
EXPECT_NEAR(0.7f, varptr[0], 1e-4f);
EXPECT_NEAR(0.7f, varptr[1], 1e-4f);
}
TEST(OperationBatchNorm, Backward) {
const float x_data[] = {1, 2, 3, 4};
Tensor x(Shape{2, 2});
x.CopyDataFromHostPtr(x_data, 2 * 2);
const float y_data[] = {9, 9, 9, 9};
Tensor y(Shape{2, 2});
y.CopyDataFromHostPtr(y_data, 2 * 2);
const float dy_data[] = {4, 3, 2, 1};
Tensor dy(Shape{2, 2});
dy.CopyDataFromHostPtr(dy_data, 2 * 2);
const float alpha_[] = {1, 1};
Tensor alpha(Shape{2});
alpha.CopyDataFromHostPtr(alpha_, 2);
const float beta_[] = {0, 0};
Tensor beta(Shape{2});
beta.CopyDataFromHostPtr(beta_, 2);
// 0 momentum will ignore running mean and var
BatchNormHandle batch_norm_handle(0.0f,x);
const float running_mean_[] = {1,2};
Tensor running_mean(Shape{2});
Tensor running_var(Shape{2});
running_mean.CopyDataFromHostPtr(running_mean_, 2);
running_var.CopyDataFromHostPtr(running_mean_, 2);
// training operation calculate the running mean and var for backward
auto ret1 = CpuBatchNormForwardTraining(batch_norm_handle, x, alpha, beta, running_mean, running_var);
// calculate dx, dscale, dbias
auto ret2 = CpuBatchNormBackwardx( batch_norm_handle, y, dy, x, alpha, beta, ret1[1], ret1[2]);
const auto &shape = ret2[0].shape();
EXPECT_EQ(2u, shape.size());
EXPECT_EQ(2u, shape[0]);
EXPECT_EQ(2u, shape[1]);
const float *dxptr = ret2[0].data<float>();
EXPECT_NEAR(.0f, dxptr[0], 1e-4f);
EXPECT_NEAR(.0f, dxptr[1], 1e-4f);
EXPECT_NEAR(.0f, dxptr[2], 1e-4f);
EXPECT_NEAR(.0f, dxptr[3], 1e-4f);
const auto &dbnScaleShape = ret2[1].shape();
EXPECT_EQ(2u, dbnScaleShape[0]);
const auto &dbnBiasShape = ret2[2].shape();
EXPECT_EQ(2u, dbnBiasShape[0]);
const float *dbnScaleptr = ret2[1].data<float>();
EXPECT_NEAR(-2.0f, dbnScaleptr[0], 1e-4f);
EXPECT_NEAR(-2.0f, dbnScaleptr[1], 1e-4f);
const float *dbnBiasptr = ret2[2].data<float>();
EXPECT_NEAR(6.0f, dbnBiasptr[0], 1e-4f);
EXPECT_NEAR(4.0f, dbnBiasptr[1], 1e-4f);
}
#endif // USE_MKLDNN