blob: 2e6a08ae6c3563b93e44c19a3e2eea33df76aa5b [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef SINGA_CORE_MATH_H_
#define SINGA_CORE_MATH_H_
#include <algorithm>
#include <iostream>
#include <iterator>
#include <sstream>
#include <string>
#include <type_traits>
#include <vector>
#include "singa/core/common.h"
#include "singa/core/tensor.h"
#include "singa/utils/logging.h"
namespace singa {
/// \file math.h Math functions for linear algebra, neural net and random
/// operations.
/// All functions have a template argument, DType for DataType, Lang for the
/// device programming language, e.g., Langice::kCpp, Langice::kCuda
///
/// TODO(wangwei) Clean the functions to make the function APIs consistent:
/// 1. All function names should be like XxxYyy or XY, i.e., capitalize the
/// first letter.
/// 2. Order functions based on function name in alphabetical order.
/// 3. Function arguments order is [const basic type] [const Tensor] [mutable
/// Tensor].
/// 4. Function argument names, use 'num' for total number of elements in
/// elementwise operations; use 'in1' 'in2' for in Tensors; use 'out' for
/// output Tensor or value. With exceptions for some functions, e.g.,
/// Scale(const float alpha, const Tensor &in, Tensor* out);
/// For such cases, use x, v, alpha, etc for scalar types.
/// For blas functions, follow the blas style for argument names.
/// Use 'M' and 'v' for matrix and vector tensors in functions involving both
/// matrix and vectors.
/// 5. For Tensor argument xxx, name its raw pointer as xxxPtr.
/// 6. Pass the 'cudaStream_t s' to every function in math_kernel.h
/// 7. Use size_t for the number of elements, rows or columns.
/// 8. Use the same name for the Tensor and Tensor level math functions.
const std::string vec2str(const std::vector<int> &vec) {
std::ostringstream vts;
if (!vec.empty()) {
// Convert all but the last element to avoid a trailing ","
std::copy(vec.begin(), vec.end(), std::ostream_iterator<int>(vts, ", "));
}
return vts.str();
}
const std::string vec2str(const std::vector<size_t> &vec) {
std::ostringstream vts;
if (!vec.empty()) {
// Convert all but the last element to avoid a trailing ","
std::copy(vec.begin(), vec.end(), std::ostream_iterator<size_t>(vts, ", "));
}
return vts.str();
}
// **************************************
// // Element-wise functions
// // Cpp tensors support multi-dimensional broadcasting;
// // Cuda supports unidirectional broadcasting,
// // i.e., the lhs and the output have the same shape
// // **************************************
/// out[i] = |in[i]|
template <typename DType, typename Lang>
void Abs(const Tensor &in, Tensor *out, Context *ctx) {
LOG(FATAL) << "Abs Not Implemented";
}
template <typename DType, typename Lang>
void Erf(const Tensor &in, Tensor *out, Context *ctx) {
LOG(FATAL) << "Erf Not Implemented";
}
template <typename DTypeSrc, typename DTypeDst, typename Lang>
void CastCopy(const Tensor *src, Tensor *dst, Context *ctx) {
LOG(FATAL) << "CastCopy Not Implemented";
}
template <typename DType, typename Lang>
void Ceil(const Tensor &in, Tensor *out, Context *ctx) {
LOG(FATAL) << "Ceil Not Implemented";
}
template <typename DType, typename Lang>
void Floor(const Tensor &in, Tensor *out, Context *ctx) {
LOG(FATAL) << "Floor Not Implemented";
}
template <typename DType, typename Lang>
void Round(const Tensor &in, Tensor *out, Context *ctx) {
LOG(FATAL) << "Round Not Implemented";
}
template <typename DType, typename Lang>
void RoundE(const Tensor &in, Tensor *out, Context *ctx) {
LOG(FATAL) << "Round Not Implemented";
}
/// out[i] = in[i] + x
template <typename DType, typename Lang>
void Add(const Tensor &in, const DType x, Tensor *out, Context *ctx) {
LOG(FATAL) << "Add Not Implemented";
}
/// out[i] = in1[i] + in2[i]
template <typename DType, typename Lang>
void Add(const Tensor &in1, const Tensor &in2, Tensor *out, Context *ctx) {
LOG(FATAL) << "Add-Pair Not Implemented";
}
/// Clamp every element into [low, high]
/// if in[i]>high, then out[i]=high; if in[i]<low, then out[i]=low.
template <typename DType, typename Lang>
void Clamp(const DType low, const DType high, const Tensor &in, Tensor *out,
Context *ctx) {
LOG(FATAL) << "Clamp Not Implemented";
}
/// out[i] = x / in[i]
template <typename DType, typename Lang>
void Div(const DType x, const Tensor &in, Tensor *out, Context *ctx) {
LOG(FATAL) << "Div Not Implemented";
}
/// out[i] = in[i] / x
template <typename DType, typename Lang>
void Div(const Tensor &in, const DType x, Tensor *out, Context *ctx) {
CHECK_NE(x, 0.f);
EltwiseMult<DType, Lang>(in, DType(1) / x, out, ctx);
}
/// out[i] = in1[i] / in2[i]
template <typename DType, typename Lang>
void Div(const Tensor &in1, const Tensor &in2, Tensor *out, Context *ctx) {
LOG(FATAL) << "Div-Pair Not Implemented";
}
/// out[i] = in[i] * x
template <typename DType, typename Lang>
void EltwiseMult(const Tensor &in, const DType x, Tensor *out, Context *ctx) {
LOG(FATAL) << "EltwiseMult Not Implemented";
}
/// out[i] = in1[i] * in2[i]
template <typename DType, typename Lang>
void EltwiseMult(const Tensor &in1, const Tensor &in2, Tensor *out,
Context *ctx) {
LOG(FATAL) << "EltwiseMult-Pair Not Implemented";
}
/// out[i]=(in2[i]>0)?in1[i]:0.f
template <typename DType, typename Lang>
void ReLUBackward(const Tensor &in1, const Tensor &in2, Tensor *out,
Context *ctx) {
LOG(FATAL) << "ReLUBackward Not Implemented";
}
/// Base is e, Neper number. out[i]=exp(in[i])
template <typename DType, typename Lang>
void Exp(const Tensor &in, Tensor *out, Context *ctx) {
LOG(FATAL) << "Exp Not Implemented";
}
/// out[i]=(in[i]<=x)?1.f:0.f
template <typename DType, typename Lang>
void LE(const Tensor &in, const DType x, Tensor *out, Context *ctx) {
LOG(FATAL) << "LE Not Implemented";
}
/// out[i]=(in1[i]<=in2[i])?1.f:0.f
template <typename DType, typename Lang>
void LE(const Tensor &in1, const Tensor &in2, Tensor *out, Context *ctx) {
LOG(FATAL) << "Tensor-Tensor LE Not Implemented";
}
/// Natural logarithm, the base is e, Neper number out[i]=log(in[i]).
template <typename DType, typename Lang>
void Log(const Tensor &in, Tensor *out, Context *ctx) {
LOG(FATAL) << "Log Not Implemented";
}
/// out[i]=(in[i]<x)?1.f:0.f
template <typename DType, typename Lang>
void LT(const Tensor &in, const DType x, Tensor *out, Context *ctx) {
LOG(FATAL) << "LT Not Implemented";
}
/// out[i]=(in1[i]<in2[i])?1.f:0.f
template <typename DType, typename Lang>
void LT(const Tensor &in1, const Tensor &in2, Tensor *out, Context *ctx) {
LOG(FATAL) << "Tensor-Tensor LT Not Implemented";
}
/// out[i]=(in[i]>=x)?1.f:0.f
template <typename DType, typename Lang>
void GE(const Tensor &in, const DType x, Tensor *out, Context *ctx) {
LOG(FATAL) << "GE Not Implemented";
}
/// out[i]=(in1[i]>=in2[i])?1.f:0.f
template <typename DType, typename Lang>
void GE(const Tensor &in1, const Tensor &in2, Tensor *out, Context *ctx) {
LOG(FATAL) << "Tensor-Tensor GE Not Implemented";
}
/// out[i]=(in[i]>x)?1.f:0.f
template <typename DType, typename Lang>
void GT(const Tensor &in, const DType x, Tensor *out, Context *ctx) {
LOG(FATAL) << "GT Not Implemented";
}
/// out[i]=(in[i]>in2[i])?1.f:0.f
template <typename DType, typename Lang>
void GT(const Tensor &in, const Tensor &in2, Tensor *out, Context *ctx) {
LOG(FATAL) << "Tensor-Tensor GT Not Implemented";
}
/// out[i]=(in[i]==x)?1.f:0.f
template <typename DType, typename Lang>
void EQ(const Tensor &in, const DType x, Tensor *out, Context *ctx) {
LOG(FATAL) << "EQ Not Implemented";
}
/// out[i]=(in[i]==in2[i])?1.f:0.f
template <typename DType, typename Lang>
void EQ(const Tensor &in, const Tensor &in2, Tensor *out, Context *ctx) {
LOG(FATAL) << "Tensor-Tensor EQ Not Implemented";
}
/// out[i] = pow(in[i], x)
template <typename DType, typename Lang>
void Pow(const Tensor &in, const DType x, Tensor *out, Context *ctx) {
LOG(FATAL) << "Pow Not Implemented";
}
/// out[i]=pow(in1[i], in2[i])
template <typename DType, typename Lang>
void Pow(const Tensor &in1, const Tensor &in2, Tensor *out, Context *ctx) {
LOG(FATAL) << "Pow-Pair Not Implemented";
}
/// out[i]=max(0, in[i])
template <typename DType, typename Lang>
void ReLU(const Tensor &in, Tensor *out, Context *ctx) {
LOG(FATAL) << "ReLU Not Implemented";
}
/// out[i] = x
template <typename DType, typename Lang>
void Set(const DType x, Tensor *out, Context *ctx) {
LOG(FATAL) << "Set Not Implemented";
}
/// out[i]=sigmoid(in[i])
template <typename DType, typename Lang>
void Sigmoid(const Tensor &in, Tensor *out, Context *ctx) {
LOG(FATAL) << "Sigmoid Not Implemented";
}
/// out[i] = log(exp(in[i]) + 1)
template <typename DType, typename Lang>
void SoftPlus(const Tensor &in, Tensor *out, Context *ctx) {
LOG(FATAL) << "SoftPlus Not Implemented";
}
/// out[i] = in[i] / (abs(in[i]) + 1)
template <typename DType, typename Lang>
void SoftSign(const Tensor &in, Tensor *out, Context *ctx) {
LOG(FATAL) << "SoftSign Not Implemented";
}
/// out[i] = sign(in[i])
template <typename DType, typename Lang>
void Sign(const Tensor &in, Tensor *out, Context *ctx) {
LOG(FATAL) << "Sign Not Implemented";
}
/// out[i]=sqrt(in[i])
template <typename DType, typename Lang>
void Sqrt(const Tensor &in, Tensor *out, Context *ctx) {
LOG(FATAL) << "Sqrt Not Implemented";
}
/// out[i]=square(in[i])
template <typename DType, typename Lang>
void Square(const Tensor &in, Tensor *out, Context *ctx) {
EltwiseMult<DType, Lang>(in, in, out, ctx);
}
/// out[i] = in[i] - x
template <typename DType, typename Lang>
void Sub(const Tensor &in, const DType x, Tensor *out, Context *ctx) {
Add<DType, Lang>(in, -x, out, ctx);
}
/// out[i] = in1[i] - in2[i]
template <typename DType, typename Lang>
void Sub(const Tensor &in1, const Tensor &in2, Tensor *out, Context *ctx) {
LOG(FATAL) << "Sub-Pair Not Implemented";
}
/// sum all elements of in into out
template <typename DType, typename Lang>
void Sum(const Tensor &in, DType *out, Context *ctx) {
LOG(FATAL) << "Sum Not Implemented";
}
/// out[i]=fn(in[i])
#define GenUnaryNotImplemented(fn, stringfn) \
template <typename DType, typename Lang> \
void fn(const Tensor &in, Tensor *out, Context *ctx) { \
std::string str = stringfn; \
str += " Not Implemented"; \
LOG(FATAL) << str; \
}
GenUnaryNotImplemented(Cos, "Cos");
GenUnaryNotImplemented(Cosh, "Cosh");
GenUnaryNotImplemented(Acos, "Acos");
GenUnaryNotImplemented(Acosh, "Acosh");
GenUnaryNotImplemented(Sin, "Sin");
GenUnaryNotImplemented(Sinh, "Sinh");
GenUnaryNotImplemented(Asin, "Asin");
GenUnaryNotImplemented(Asinh, "Asinh");
GenUnaryNotImplemented(Tan, "Tan");
GenUnaryNotImplemented(Tanh, "Tanh");
GenUnaryNotImplemented(Atan, "Atan");
GenUnaryNotImplemented(Atanh, "Atanh");
/// similar to cudnnTransformTensor
/// copies the data from one tensor to another tensor with a different layout
/// the tensors must have the same dimensions but not necessarily the same
/// strides
template <typename DType, typename Lang>
void Transform(const Tensor &in, Tensor *out, Context *ctx) {
LOG(FATAL) << "Transform Not Implemented";
}
// **************************************
// Random functions
// **************************************
/// Each element of out would be 1 with prob p and 0 with 1-p. 0<= p <= 1
// Get the random generator from 'ctx'
// If DType is not float, then convert the threshold to DType
template <typename DType, typename Lang>
void Bernoulli(const float p, Tensor *out, Context *ctx) {
LOG(FATAL) << "Bernoulli Not Implemented";
}
// The random generator should be extracted from ctx.
// If DType is not float, then convert the mean and std to DType
template <typename DType, typename Lang>
void Gaussian(const DType mean, const DType std, Tensor *out, Context *ctx) {
LOG(FATAL) << "Gaussian Not Implemented";
}
// The random generator should be extracted from ctx.
// If DType is not float, then convert the low and high to DType
template <typename DType, typename Lang>
void Uniform(const DType low, const DType high, Tensor *out, Context *ctx) {
LOG(FATAL) << "Uniform Not Implemented";
}
// *********************************************************
// BLAS functions, ref to http://docs.nvidia.com/cuda/cublas
// *********************************************************
/// outurn the index of the element with the max value.
template <typename DType, typename Lang>
void Amax(const Tensor &in, size_t *out, Context *ctx) {
LOG(FATAL) << "Amax Not Implemented";
}
/// outurn the index of the element with the min value.
template <typename DType, typename Lang>
void Amin(const Tensor &in, size_t *out, Context *ctx) {
LOG(FATAL) << "Amin Not Implemented";
}
/// out = sum |x| for all x in in
template <typename DType, typename Lang>
void Asum(const Tensor &in, DType *out, Context *ctx) {
LOG(FATAL) << "Asum Not Implemented";
}
/// out = alpha * in + out
template <typename DType, typename Lang>
void Axpy(const DType alpha, const Tensor &in, Tensor *out, Context *ctx) {
LOG(FATAL) << "Axpy Not Implemented";
}
/// out = alpha * in + out
template <typename DType, typename Lang>
void Axpy(const Tensor &alpha, const Tensor &in, Tensor *out, Context *ctx) {
LOG(FATAL) << "Axpy Not Implemented";
}
/// out = ||in||_2^2, i.e, L2 norm.
template <typename DType, typename Lang>
void Nrm2(const Tensor &in, float *out, Context *ctx) {
LOG(FATAL) << "Nrm2 Not Implemented";
}
/// out *= x
template <typename DType, typename Lang>
void Scale(const DType x, Tensor *out, Context *ctx) {
LOG(FATAL) << "Scale Not Implemented";
}
/// inner product of array in1 and in2
template <typename DType, typename Lang>
void Dot(const Tensor &in1, const Tensor &in2, DType *out, Context *ctx) {
LOG(FATAL) << "Dot Not Implemented";
}
template <typename DType, typename Lang>
void Dot(const Tensor &in1, const Tensor &in2, Tensor *out, Context *ctx) {
LOG(FATAL) << "Dot Not Implemented";
}
/// out = alpha * A * v + beta * out.
/// transA indicates if the internal data layout is transposed of A
template <typename DType, typename Lang>
void GEMV(const DType alpha, const Tensor &A, const Tensor &v, const DType beta,
Tensor *out, Context *ctx) {
LOG(FATAL) << "GEMV Not Implemented";
}
/// multiply a matrix with a diagnoal matrix constructed using values from 'v'.
/// if matrix_lef_side is true, do M*v; else do v*M
template <typename DType, typename Lang>
void DGMM(const bool side_right, const Tensor &M, const Tensor &v, Tensor *out,
Context *ctx) {
LOG(FATAL) << "DGMM Not Implemented";
}
/// C = alpha * A * B + beta * C.
/// transA indicates if the internal data layout is transposed of A
template <typename DType, typename Lang>
void GEMM(const DType alpha, const Tensor &A, const Tensor &B, const DType beta,
Tensor *C, Context *ctx) {
LOG(FATAL) << "GEMM Not Implemented";
}
template <typename DType, typename Lang>
void GEMMBatched(const DType alpha, const Tensor &A, const Tensor &B,
const DType beta, Tensor *C, Context *ctx) {
LOG(FATAL) << "GEMM Batched Not Implemented";
}
template <typename DType, typename Lang>
void SoftMax(const Tensor &in, Tensor *out, Context *ctx) {
LOG(FATAL) << "Not Implemented";
}
template <typename DType, typename Lang>
void SoftMaxBackward(const Tensor &in, Tensor *out, const Tensor &fdout,
Context *ctx) {
LOG(FATAL) << "Not Implemented";
}
// yisen todo
template <typename DType, typename Lang>
void ComputeCrossEntropy(bool int_target, const size_t batchsize,
const size_t dim, const Tensor &p, const Tensor &t,
Tensor *loss, Context *ctx) {
LOG(FATAL) << "Not Implemented";
}
template <typename DType, typename Lang>
void SoftmaxCrossEntropyBwd(bool int_target, const size_t batchsize,
const size_t dim, const Tensor &p, const Tensor &t,
Tensor *grad, Context *ctx) {
LOG(FATAL) << "Not Implemented";
}
template <typename DType, typename Lang>
void RowMax(const Tensor &in, Tensor *out, Context *ctx) {
LOG(FATAL) << "Not Implemented";
}
// **************************************
// Matrix functions
// **************************************
/*
/// Add the vector v to every column of A as the column of out
template <typename DType, typename Lang>
void AddCol(const size_t nrow, const size_t ncol, const Tensor &A, const Tensor
&v,
Tensor *out, Context *ctx) {
LOG(FATAL) << "AddCol Not Implemented";
}
// TODO(wangwei) unify AddRow and AddCol.
/// Add the vector v to every row of A as the row of out
template <typename DType, typename Lang>
void AddRow(const size_t nrow, const size_t ncol, const Tensor &A, const Tensor
&v,
Tensor *out, Context *ctx) {
LOG(FATAL) << "AddRow Not Implemented";
}
/// outer-product.
/// in1 and in2 are vectors of len m and n. out is matrix of shape m * n
template <typename DType, typename Lang>
void Outer(const size_t m, const size_t n, const Tensor &in1, const Tensor &in2,
Tensor *out, Context *ctx) {
LOG(FATAL) << "Outer Not Implemented";
}
/// Sum the columns of the in matrix into a vector
template <typename DType, typename Lang>
void SumColumns(const size_t nrow, const size_t ncol, const Tensor &in, Tensor
*out,
Context *ctx) {
LOG(FATAL) << "SumColumns Not Implemented";
}
template <typename DType, typename Lang>
void Set(const DType x, Tensor *out, Context *ctx) {
LOG(FATAL) << "Not Implemented";
}
// TODO(wangwei) unify SumRow and SumCol.
/// Sum the rows of the in matrix into a vector
template <typename DType, typename Lang>
void SumRows(const size_t nrow, const size_t ncol, const Tensor &in, Tensor
*out,
Context *ctx) {
LOG(FATAL) << "SumRows Not Implemented";
}
*/
} // namespace singa
#endif // SINGA_CORE_MATH_H_