blob: 8101bf2a624fada7a046126470a8576f9d5ce98f [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*!
* \file ndarray_function-inl.h
* \brief The real implementation of NDArray functions.
*/
#ifndef MXNET_NDARRAY_NDARRAY_FUNCTION_INL_H_
#define MXNET_NDARRAY_NDARRAY_FUNCTION_INL_H_
#include <vector>
#include "./ndarray_function.h"
// this file will be included twice by CPU and GPU
// macro to help specialize evaluation function
#ifndef DECL_TERNARY
#define DECL_TERNARY(XPU, OP, FUN) \
template <> \
void Eval<XPU, OP>( \
const TBlob& lhs, const TBlob& mhs, const TBlob& rhs, TBlob* ret, RunContext ctx) { \
FUN<XPU, OP>(lhs, mhs, rhs, ret, ctx); \
}
#endif
#ifndef DECL_BINARY
#define DECL_BINARY(XPU, OP, FUN) \
template <> \
void Eval<XPU, OP>(const TBlob& lhs, const TBlob& rhs, TBlob* ret, RunContext ctx) { \
FUN<XPU, OP>(lhs, rhs, ret, ctx); \
}
#endif
#ifndef DECL_BINARY_LAUNCH
#define DECL_BINARY_LAUNCH(XPU, OP) \
template <> \
void BinaryOpKernelImpl<OP, XPU>( \
mshadow::Stream<XPU> * s, const TBlob& lhs, const TBlob& rhs, TBlob* out) { \
BinaryOpKernelLaunch<OP>(s, lhs, rhs, out); \
}
#endif
#ifndef DECL_SCALAR
#define DECL_SCALAR(XPU, OP, FUN, REVERSE) \
template <> \
void Eval<XPU, OP, REVERSE>(const TBlob& lhs, const real_t& rhs, TBlob* ret, RunContext ctx) { \
FUN<XPU, OP, REVERSE>(lhs, rhs, ret, ctx); \
}
#endif
#if defined(__CUDACC__)
#define DEVICE gpu
#else
#define DEVICE cpu
#endif
namespace mxnet {
namespace ndarray {
// true implementation
template <typename xpu, typename OP>
void EvalBinary_(const TBlob& lhs, const TBlob& rhs, TBlob* ret, RunContext ctx) {
using namespace mshadow::expr;
mshadow::Stream<xpu>* s = ctx.get_stream<xpu>();
CHECK_EQ(ret->type_flag_, lhs.type_flag_) << "Only support input/output with the same data type";
CHECK_EQ(ret->type_flag_, rhs.type_flag_) << "Only support input/output with the same data type";
MSHADOW_TYPE_SWITCH(ret->type_flag_, DType, {
ret->FlatTo2D<xpu, DType>(s) =
F<typename OP::mshadow_op>(lhs.FlatTo2D<xpu, DType>(s), rhs.FlatTo2D<xpu, DType>(s));
});
}
template <typename xpu, typename OP>
void EvalOneHot_(const TBlob& index, const TBlob& rhs, TBlob* ret, RunContext ctx) {
LOG(INFO) << "The operator onehot_encode is deprecated; use one_hot instead.";
using namespace mshadow::expr;
mshadow::Stream<xpu>* s = ctx.get_stream<xpu>();
// TODO(eric): support mixed type encoding, i.e. int index and float rhs.
CHECK_EQ(ret->type_flag_, mshadow::default_type_flag)
<< "one_hot_encode only support float32 as input/output";
CHECK_EQ(rhs.type_flag_, mshadow::default_type_flag)
<< "one_hot_encode only support float32 as input/output";
CHECK_EQ(index.type_flag_, mshadow::default_type_flag)
<< "one_hot_encode only support float32 as input/output";
ret->get<xpu, 2, real_t>(s) = one_hot_encode(index.get<xpu, 1, real_t>(s), rhs.shape_[1]);
}
template <typename xpu, typename OP>
void EvalMatChooseRowElem_(const TBlob& lhs, const TBlob& rhs, TBlob* ret, RunContext ctx) {
using namespace mshadow::expr;
mshadow::Stream<xpu>* s = ctx.get_stream<xpu>();
// TODO(eric): support mixed type choose, i.e. int index and float rhs.
CHECK_EQ(ret->type_flag_, mshadow::default_type_flag)
<< "mat_choose_row_element only support float32 as input/output";
CHECK_EQ(rhs.type_flag_, mshadow::default_type_flag)
<< "mat_choose_row_element only support float32 as input/output";
CHECK_EQ(lhs.type_flag_, mshadow::default_type_flag)
<< "mat_choose_row_element only support float32 as input/output";
ret->get<xpu, 1, real_t>(s) =
mat_choose_row_element(lhs.get<xpu, 2, real_t>(s), rhs.get<xpu, 1, real_t>(s));
}
template <typename xpu, typename OP>
void EvalMatFillRowElem_(const TBlob& lhs,
const TBlob& mhs,
const TBlob& rhs,
TBlob* ret,
RunContext ctx) {
using namespace mshadow::expr;
mshadow::Stream<xpu>* s = ctx.get_stream<xpu>();
ret->get<xpu, 2, real_t>(s) = mat_fill_row_element(
lhs.get<xpu, 2, real_t>(s), mhs.get<xpu, 1, real_t>(s), rhs.get<xpu, 1, real_t>(s));
}
template <typename xpu, typename OP, bool reverse>
void EvalScalar_(const TBlob& lhs, const real_t& rhs, TBlob* ret, RunContext ctx) {
using namespace mshadow::expr;
mshadow::Stream<xpu>* s = ctx.get_stream<xpu>();
CHECK_EQ(ret->type_flag_, lhs.type_flag_) << "Only support input/output with the same data type";
if (reverse) {
MSHADOW_TYPE_SWITCH(ret->type_flag_, DType, {
ret->FlatTo2D<xpu, DType>(s) =
F<typename OP::mshadow_op>(scalar(DType(rhs)), lhs.FlatTo2D<xpu, DType>(s));
});
} else {
MSHADOW_TYPE_SWITCH(ret->type_flag_, DType, {
ret->FlatTo2D<xpu, DType>(s) =
F<typename OP::mshadow_op>(lhs.FlatTo2D<xpu, DType>(s), scalar(DType(rhs)));
});
}
}
template <>
void EvalClip<DEVICE>(const TBlob& src,
const real_t& a_min,
const real_t& a_max,
TBlob* ret,
RunContext ctx) {
typedef DEVICE xpu;
using namespace mshadow::expr;
mshadow::Stream<xpu>* s = ctx.get_stream<xpu>();
CHECK_EQ(ret->type_flag_, src.type_flag_) << "Only support input/output with the same data type";
MSHADOW_TYPE_SWITCH(ret->type_flag_, DType, {
ret->FlatTo2D<xpu, DType>(s) = F<ClipMax::mshadow_op>(
F<ClipMin::mshadow_op>(src.FlatTo2D<xpu, DType>(s), scalar(DType(a_min))),
scalar(DType(a_max)));
});
}
template <>
void EvalRandom<DEVICE, UniformDistribution>(const real_t& a,
const real_t& b,
const Resource& resource,
TBlob* ret,
RunContext ctx) {
typedef DEVICE xpu;
mshadow::Stream<xpu>* s = ctx.get_stream<xpu>();
switch (ret->type_flag_) {
case mshadow::kFloat32: {
mshadow::Random<xpu, float>* prnd = resource.get_random<xpu, float>(s);
mshadow::Tensor<xpu, 2, float> tmp = ret->FlatTo2D<xpu, float>(s);
prnd->SampleUniform(&tmp, float(a), float(b)); // NOLINT(*)
break;
}
case mshadow::kFloat64: {
mshadow::Random<xpu, double>* prnd = resource.get_random<xpu, double>(s);
mshadow::Tensor<xpu, 2, double> tmp = ret->FlatTo2D<xpu, double>(s);
prnd->SampleUniform(&tmp, double(a), double(b)); // NOLINT(*)
break;
}
default:
LOG(FATAL) << "Random only support float32 and float64";
}
}
template <>
void EvalRandom<DEVICE, GaussianDistribution>(const real_t& mu,
const real_t& sigma,
const Resource& resource,
TBlob* ret,
RunContext ctx) {
typedef DEVICE xpu;
mshadow::Stream<xpu>* s = ctx.get_stream<xpu>();
switch (ret->type_flag_) {
case mshadow::kFloat32: {
mshadow::Random<xpu, float>* prnd = resource.get_random<xpu, float>(s);
mshadow::Tensor<xpu, 2, float> tmp = ret->FlatTo2D<xpu, float>(s);
prnd->SampleGaussian(&tmp, float(mu), float(sigma)); // NOLINT(*)
break;
}
case mshadow::kFloat64: {
mshadow::Random<xpu, double>* prnd = resource.get_random<xpu, double>(s);
mshadow::Tensor<xpu, 2, double> tmp = ret->FlatTo2D<xpu, double>(s);
prnd->SampleGaussian(&tmp, double(mu), double(sigma)); // NOLINT(*)
break;
}
default:
LOG(FATAL) << "Random only support float32 and float64";
}
}
#if defined(__CUDACC__)
template <>
void EvalRandom<gpu, GammaDistribution>(const real_t& alpha,
const real_t& beta,
const Resource& resource,
TBlob* ret,
RunContext ctx) {
EvalRandom<cpu, GammaDistribution>(alpha, beta, resource, ret, ctx);
}
template <>
void EvalRandom<gpu, ExponentialDistribution>(
const real_t& lambda,
const real_t& dummy, // this is to satisfy the SampleOp lambda signature
const Resource& resource,
TBlob* ret,
RunContext ctx) {
EvalRandom<cpu, ExponentialDistribution>(lambda, dummy, resource, ret, ctx);
}
template <>
void EvalRandom<gpu, PoissonDistribution>(
const real_t& lambda,
const real_t& dummy, // this is to satisfy the SampleOp lambda signature
const Resource& resource,
TBlob* ret,
RunContext ctx) {
EvalRandom<cpu, PoissonDistribution>(lambda, dummy, resource, ret, ctx);
}
template <>
void EvalRandom<gpu, NegBinomialDistribution>(const real_t& k,
const real_t& p,
const Resource& resource,
TBlob* ret,
RunContext ctx) {
EvalRandom<cpu, NegBinomialDistribution>(k, p, resource, ret, ctx);
}
template <>
void EvalRandom<gpu, GenNegBinomialDistribution>(const real_t& mu,
const real_t& alpha,
const Resource& resource,
TBlob* ret,
RunContext ctx) {
EvalRandom<cpu, GenNegBinomialDistribution>(mu, alpha, resource, ret, ctx);
}
#else
template <>
void EvalRandom<cpu, GammaDistribution>(const real_t& alpha,
const real_t& beta,
const Resource& resource,
TBlob* ret,
RunContext ctx) {
typedef cpu xpu; // No support for gpu for this distribution.
mshadow::Stream<xpu>* s = ctx.get_stream<xpu>();
switch (ret->type_flag_) {
case mshadow::kFloat32: {
mshadow::Random<xpu, float>* prnd = resource.get_random<xpu, float>(s);
mshadow::Tensor<xpu, 2, float> tmp = ret->FlatTo2D<xpu, float>(s);
prnd->SampleGamma(&tmp, float(alpha), float(beta)); // NOLINT(*)
break;
}
case mshadow::kFloat64: {
mshadow::Random<xpu, double>* prnd = resource.get_random<xpu, double>(s);
mshadow::Tensor<xpu, 2, double> tmp = ret->FlatTo2D<xpu, double>(s);
prnd->SampleGamma(&tmp, double(alpha), double(beta)); // NOLINT(*)
break;
}
default:
LOG(FATAL) << "Random only support float32 and float64";
}
}
template <>
void EvalRandom<cpu, ExponentialDistribution>(
const real_t& lambda,
const real_t& dummy, // this is to satisfy the SampleOp lambda signature
const Resource& resource,
TBlob* ret,
RunContext ctx) {
typedef cpu xpu; // No support for gpu for this distribution.
mshadow::Stream<xpu>* s = ctx.get_stream<xpu>();
switch (ret->type_flag_) {
case mshadow::kFloat32: {
mshadow::Random<xpu, float>* prnd = resource.get_random<xpu, float>(s);
mshadow::Tensor<xpu, 2, float> tmp = ret->FlatTo2D<xpu, float>(s);
prnd->SampleExponential(&tmp, float(lambda)); // NOLINT(*)
break;
}
case mshadow::kFloat64: {
mshadow::Random<xpu, double>* prnd = resource.get_random<xpu, double>(s);
mshadow::Tensor<xpu, 2, double> tmp = ret->FlatTo2D<xpu, double>(s);
prnd->SampleExponential(&tmp, double(lambda)); // NOLINT(*)
break;
}
default:
LOG(FATAL) << "Random only support float32 and float64";
}
}
template <>
void EvalRandom<cpu, PoissonDistribution>(
const real_t& lambda,
const real_t& dummy, // this is to satisfy the SampleOp lambda signature
const Resource& resource,
TBlob* ret,
RunContext ctx) {
typedef cpu xpu; // No support for gpu for this distribution.
mshadow::Stream<xpu>* s = ctx.get_stream<xpu>();
switch (ret->type_flag_) {
case mshadow::kFloat32: {
mshadow::Random<xpu, float>* prnd = resource.get_random<xpu, float>(s);
mshadow::Tensor<xpu, 2, float> tmp = ret->FlatTo2D<xpu, float>(s);
prnd->SamplePoisson(&tmp, float(lambda)); // NOLINT(*)
break;
}
case mshadow::kFloat64: {
mshadow::Random<xpu, double>* prnd = resource.get_random<xpu, double>(s);
mshadow::Tensor<xpu, 2, double> tmp = ret->FlatTo2D<xpu, double>(s);
prnd->SamplePoisson(&tmp, double(lambda)); // NOLINT(*)
break;
}
default:
LOG(FATAL) << "Random only support float32 and float64";
}
}
template <>
void EvalRandom<cpu, NegBinomialDistribution>(const real_t& k,
const real_t& p,
const Resource& resource,
TBlob* ret,
RunContext ctx) {
typedef cpu xpu; // No support for gpu for this distribution.
mshadow::Stream<xpu>* s = ctx.get_stream<xpu>();
switch (ret->type_flag_) {
case mshadow::kFloat32: {
mshadow::Random<xpu, float>* prnd = resource.get_random<xpu, float>(s);
mshadow::Tensor<xpu, 2, float> tmp = ret->FlatTo2D<xpu, float>(s);
prnd->SampleNegativeBinomial(&tmp, float(k), float(p)); // NOLINT(*)
break;
}
case mshadow::kFloat64: {
mshadow::Random<xpu, double>* prnd = resource.get_random<xpu, double>(s);
mshadow::Tensor<xpu, 2, double> tmp = ret->FlatTo2D<xpu, double>(s);
prnd->SampleNegativeBinomial(&tmp, double(k), double(p)); // NOLINT(*)
break;
}
default:
LOG(FATAL) << "Random only support float32 and float64";
}
}
template <>
void EvalRandom<cpu, GenNegBinomialDistribution>(const real_t& mu,
const real_t& alpha,
const Resource& resource,
TBlob* ret,
RunContext ctx) {
typedef cpu xpu; // No support for gpu for this distribution.
mshadow::Stream<xpu>* s = ctx.get_stream<xpu>();
switch (ret->type_flag_) {
case mshadow::kFloat32: {
mshadow::Random<xpu, float>* prnd = resource.get_random<xpu, float>(s);
mshadow::Tensor<xpu, 2, float> tmp = ret->FlatTo2D<xpu, float>(s);
prnd->SampleGeneralizedNegativeBinomial(&tmp, float(mu), float(alpha)); // NOLINT(*)
break;
}
case mshadow::kFloat64: {
mshadow::Random<xpu, double>* prnd = resource.get_random<xpu, double>(s);
mshadow::Tensor<xpu, 2, double> tmp = ret->FlatTo2D<xpu, double>(s);
prnd->SampleGeneralizedNegativeBinomial(&tmp, double(mu), double(alpha)); // NOLINT(*)
break;
}
default:
LOG(FATAL) << "Random only support float32 and float64";
}
}
#endif // #ifndef __CUDACC__
template <>
void Eval<DEVICE>(const real_t& rhs, TBlob* ret, RunContext ctx) {
mshadow::Stream<DEVICE>* s = ctx.get_stream<DEVICE>();
MSHADOW_TYPE_SWITCH_EXT_WITH_BOOL(
ret->type_flag_, DType, { ret->FlatTo2D<DEVICE, DType>(s) = DType(rhs); });
}
template <>
void ElementwiseSum<DEVICE>(const std::vector<TBlob> source, TBlob* dst, RunContext ctx) {
typedef DEVICE xpu;
using namespace mshadow;
using namespace mshadow::expr;
Stream<xpu>* s = ctx.get_stream<xpu>();
for (size_t i = 1; i < source.size(); ++i) {
CHECK_EQ(source[i].type_flag_, dst->type_flag_)
<< "Only support input/output with the same data type";
}
MSHADOW_TYPE_SWITCH(dst->type_flag_, DType, {
Tensor<xpu, 2, DType> out = dst->FlatTo2D<xpu, DType>(s);
switch (source.size()) {
case 2: {
Tensor<xpu, 2, DType> in_0 = source[0].FlatTo2D<xpu, DType>(s);
Tensor<xpu, 2, DType> in_1 = source[1].FlatTo2D<xpu, DType>(s);
out = in_0 + in_1;
break;
}
case 3: {
Tensor<xpu, 2, DType> in_0 = source[0].FlatTo2D<xpu, DType>(s);
Tensor<xpu, 2, DType> in_1 = source[1].FlatTo2D<xpu, DType>(s);
Tensor<xpu, 2, DType> in_2 = source[2].FlatTo2D<xpu, DType>(s);
out = in_0 + in_1 + in_2;
break;
}
case 4: {
Tensor<xpu, 2, DType> in_0 = source[0].FlatTo2D<xpu, DType>(s);
Tensor<xpu, 2, DType> in_1 = source[1].FlatTo2D<xpu, DType>(s);
Tensor<xpu, 2, DType> in_2 = source[2].FlatTo2D<xpu, DType>(s);
Tensor<xpu, 2, DType> in_3 = source[3].FlatTo2D<xpu, DType>(s);
out = in_0 + in_1 + in_2 + in_3;
break;
}
default: {
Tensor<xpu, 2, DType> in_0 = source[0].FlatTo2D<xpu, DType>(s);
out = F<op::mshadow_op::identity>(in_0);
for (size_t i = 1; i < source.size(); ++i) {
out += source[i].FlatTo2D<xpu, DType>(s);
}
break;
}
}
});
}
template <>
void EvalBroadcast<DEVICE>(TBlob const& src, TBlob* ret, int size, RunContext ctx) {
typedef DEVICE xpu;
mshadow::Stream<xpu>* s = ctx.get_stream<xpu>();
mshadow::Tensor<xpu, 3> out = ret->get<xpu, 3, real_t>(s);
mshadow::Tensor<xpu, 2> in = src.get<xpu, 2, real_t>(s);
out = mshadow::expr::broadcast_with_axis(in, 0, size);
}
template <typename OP, typename xpu>
void BinaryOpKernelLaunch(mshadow::Stream<xpu>* s, const TBlob& lhs, const TBlob& rhs, TBlob* out) {
using namespace op::mxnet_op;
using namespace mshadow;
MSHADOW_TYPE_SWITCH(out->type_flag_, DType, {
Kernel<op_with_req<OP, kWriteInplace>, xpu>::Launch(
s, lhs.Size(), out->dptr<DType>(), lhs.dptr<DType>(), rhs.dptr<DType>());
});
}
// declarations
DECL_BINARY(DEVICE, MatChooseRowElem, EvalMatChooseRowElem_)
DECL_TERNARY(DEVICE, MatFillRowElem, EvalMatFillRowElem_)
DECL_BINARY(DEVICE, OneHotEncode, EvalOneHot_)
DECL_SCALAR(DEVICE, Plus, EvalScalar_, true)
DECL_SCALAR(DEVICE, Minus, EvalScalar_, true)
DECL_SCALAR(DEVICE, Mul, EvalScalar_, true)
DECL_SCALAR(DEVICE, Div, EvalScalar_, true)
DECL_BINARY_LAUNCH(DEVICE, Plus)
DECL_BINARY_LAUNCH(DEVICE, Minus)
DECL_BINARY_LAUNCH(DEVICE, Mul)
DECL_BINARY_LAUNCH(DEVICE, Div)
// for reverse seq
DECL_SCALAR(DEVICE, Plus, EvalScalar_, false)
DECL_SCALAR(DEVICE, Minus, EvalScalar_, false)
DECL_SCALAR(DEVICE, Mul, EvalScalar_, false)
DECL_SCALAR(DEVICE, Div, EvalScalar_, false)
} // namespace ndarray
} // namespace mxnet
#endif // MXNET_NDARRAY_NDARRAY_FUNCTION_INL_H_