* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
#include <vector>
#include <tuple>
#include <memory>
#include "singa/core/common.h"
#include "singa/core/device.h"
#include "singa/proto/core.pb.h"
#include "singa/utils/logging.h"
using std::vector;
using std::tuple;
namespace singa {
typedef vector<size_t> Shape;
/// hardcode the width of types defined in DataType
const size_t kDataWidth[] = {sizeof(float), sizeof(float) / 2,
sizeof(int), sizeof(char),
sizeof(double), sizeof(unsigned char)};
inline size_t SizeOf(DataType t) {
static_assert(kNumDataType == sizeof(kDataWidth) / sizeof(size_t),
"Num of data types not match num of data width");
CHECK_GT(kNumDataType, t);
return kDataWidth[t];
/// A Tensor instance is a multi-dimensional array resident on a Device
/// (default device is the host CPU). The internal data is allocated in lazy
/// manner.
/// Linear algebra, neural net and random operations are provided against
/// Tensor.
/// For all operations, if the result tensor is passed as an argument,
/// then it must be set up correctly (shape, device). Otherwise, runtime error
/// like SegmentFault would happen. Simply type/device check would be conducted.
class Tensor {
explicit Tensor(Shape &&shape, DataType dtype = kFloat32);
explicit Tensor(const Shape &shape, DataType dtype = kFloat32);
Tensor(Shape &&shape, std::shared_ptr<Device> dev, DataType dtype = kFloat32);
Tensor(const Shape &shape, std::shared_ptr<Device> dev,
DataType dtype = kFloat32);
/// Copy Tensor to share the internal data. No deep copy.
Tensor(const Tensor &from);
/// Copy Tensor to share the internal data. No deep copy.
Tensor(Tensor &&from);
/// For functions in to access the block.
/// Users should not operate against Block directly.
/// block_ is allocated in constructors.
Block *block() const { return block_; }
void SetBlock(Block *block);
std::shared_ptr<Device> device() const { return device_; }
/// return immutable Tensor values with given type.
template <typename SType>
const SType *data() const {
return static_cast<const SType *>(block()->data());
/// used for swig code to convert Tensor into numpy array.
/// It gets data into 'value'
template <typename SType>
void GetValue(SType *value, const size_t num) {
CHECK(device_ == defaultDevice);
const SType* ptr = data<SType>();
for(size_t i = 0; i < num; i++) value[i] = ptr[i];
/// data type, including kFloat16, kFloat32, kInt
const DataType data_type() const { return data_type_; }
const Shape &shape() const { return shape_; }
const size_t shape(const size_t idx) const {
CHECK_LT(idx, shape_.size());
size_t nDim() const { return shape_.size(); }
bool transpose() const { return transpose_; }
/// return number of total elements
size_t Size() const {
if (block_ == nullptr) return 0u;
CHECK_EQ(block_->size() % SizeOf(data_type_), 0u);
return block_->size() / SizeOf(data_type_);
/// return memory size (i.e., Bytes)
size_t MemSize() const { return block_->size(); }
/// Reset the tensor shape, it may reallocate block, if MemSize() changes.
void Reshape(const Shape &shape);
void Reshape(Shape &&shape);
/// Reset the shape, device, and data type as given tensor.
/// If block size changes, then reallocate a new block. The previous block
/// would
/// be deleted.
void ResetLike(const Tensor &t);
/// Reset the data type, it would reallocate block if type changes.
void AsType(const DataType type);
/// Reset the device.
/// If the target device is a diff device, then do deep data copy.
void ToDevice(std::shared_ptr<Device> dev);
/// Equivalent to ToDevice(host_dev).
void ToHost();
/// Set each element of the tensor to be x
template <typename SType>
void SetValue(const SType x);
/// For init the tensor values, copy 'num' elements from 'src' to the internal
/// memory with 'offset' (elements).
template <typename SType>
void CopyDataFromHostPtr(const SType *src, const size_t num,
const size_t offset = 0);
/// Copy data from another Tensor which may be on a diff device.
/// Meta data would not be copied!
void CopyData(const Tensor &other);
/// Deserialize data, shape and transpose from protobuf object.
void FromProto(const singa::TensorProto &proto);
/// Serialize data, shape and transpose to protobuf object.
void ToProto(singa::TensorProto *proto) const;
/// return an exactly the same Tensor with data been deep copied to the given
/// device. If 'device' is nullptr, then clone it one the current device.
Tensor Clone(std::shared_ptr<Device> device = nullptr) const;
// Tensor operations
/// Matrix transpose. Valid only if shape.size() == 2.
/// No data copy, just set the transpose_ filed of the returned tensor.
Tensor T() const;
/// Copy the meta info with data block shared.
Tensor &operator=(const Tensor &in);
/// Copy the meta info with data block shared.
Tensor &operator=(Tensor &&in);
Tensor &operator+=(const Tensor &in);
// void operator+=(Tensor&& in);
Tensor &operator-=(const Tensor &in);
// void operator-=(Tensor&& in);
Tensor &operator*=(const Tensor &in);
// void operator*=(Tensor&& in);
Tensor &operator/=(const Tensor &in);
// void operator/=(Tensor&& in);
// Scalar operations.
/// SType is a scalar type
template <typename SType>
Tensor &operator+=(const SType x);
/// SType is a scalar type
template <typename SType>
Tensor &operator-=(const SType x);
/// SType is a scalar type
template <typename SType>
Tensor &operator*=(const SType x);
/// SType is a scalar type
template <typename SType>
Tensor &operator/=(const SType x);
/// Return average L1 norm
float L1() const;
/// Return average L2 norm
float L2() const;
bool transpose_ = false;
DataType data_type_ = kFloat32;
std::shared_ptr<Device> device_ = nullptr;
/// Note: block_ is allocated in lazy manner to avoid frequent malloc/free.
/// If you want to get an allocated Block, use block() instead of block_.
Block *block_ = nullptr;
Shape shape_ = {};
typedef Shape::iterator ShapeIter;
inline size_t Product(const Shape &shape, int start = 0, size_t len = 0) {
if (len == 0) len = shape.size();
if (len == 0)
return 0;
CHECK_LE(len, shape.size());
size_t v = 1;
for (unsigned int i = start; i < len; i++) v *= shape[i];
return v;
inline void CheckDataTypeAndLang(const Tensor &in1, const Tensor &in2) {
CHECK_EQ(in1.data_type(), in2.data_type());
CHECK_EQ(in1.device()->lang(), in2.device()->lang());
template <typename FromType, typename ToType>
ToType TypeCast(const FromType &x) {
// TODO(wangwei) cast fp16; prevent some casts, e.g., float to char
return static_cast<ToType>(x);
Tensor Reshape(const Tensor &in, const Shape &s);
Tensor Reshape(const Tensor &in, Shape &&s);
// For tensors with sparse content, e.g., missing columns or rows.
// class SparseTensor : public Tensor {};
/// Copy 'num' elements of src to dst.
/// The first 'src_offset' ('dst_offset') elements will be skipped.
void CopyDataToFrom(Tensor *dst, const Tensor &src, const size_t num,
const size_t dst_offset = 0, const size_t src_offset = 0);
// =============Element-wise operations====================================
Tensor Abs(const Tensor &in);
Tensor Exp(const Tensor &in);
Tensor Log(const Tensor &in);
Tensor ReLU(const Tensor &in);
Tensor Sigmoid(const Tensor &in);
Tensor Sign(const Tensor &in);
Tensor Sqrt(const Tensor &in);
Tensor Square(const Tensor &in);
Tensor Tanh(const Tensor &in);
void Abs(const Tensor &in, Tensor *out);
void Exp(const Tensor &in, Tensor *out);
void Log(const Tensor &in, Tensor *out);
void ReLU(const Tensor &in, Tensor *out);
void Sigmoid(const Tensor &in, Tensor *out);
void Sign(const Tensor &in, Tensor *out);
void Sqrt(const Tensor &in, Tensor *out);
void Square(const Tensor &in, Tensor *out);
void Tanh(const Tensor &in, Tensor *out);
/// Element-wise opeartion, out[i]=in[i]^x
template <typename SType>
Tensor Pow(const Tensor &in, const SType x);
/// Element-wise opeartion, out[i]=in[i]^x
template <typename SType>
void Pow(const Tensor &in, const SType x, Tensor *out);
/// Element-wise opeartion, out[i]=baes[i]^exp[i]
Tensor Pow(const Tensor &base, const Tensor &exp);
/// Element-wise opeartion, out[i]=baes[i]^exp[i]
void Pow(const Tensor &base, const Tensor &exp, Tensor *out);
/// Element-wise operation, out[i]= (in[i] < x) ? 1.f : 0.f
template <typename SType>
Tensor operator<(const Tensor &in, const SType x);
template <typename SType>
void LT(const Tensor &in, const SType x, Tensor *out);
/// Element-wise operation, out[i]= (in1[i] < in2[i]) ? 1.f : 0.f
Tensor operator<(const Tensor &in1, const Tensor& in2);
void LT(const Tensor &in1, const Tensor& in2, Tensor *out);
/// Element-wise operation, out[i]= (in[i] <= x) ? 1.f : 0.f
template <typename SType>
Tensor operator<=(const Tensor &in, const SType x);
template <typename SType>
void LE(const Tensor &in, const SType x, Tensor *out);
/// Element-wise operation, out[i]= (in1[i] <= in2[i]) ? 1.f : 0.f
Tensor operator<=(const Tensor &in1, const Tensor& in2);
void LE(const Tensor &in1, const Tensor& in2, Tensor *out);
/// Element-wise operation, out[i]= (in[i] > x) ? 1.f : 0.f
template <typename SType>
Tensor operator>(const Tensor &in, const SType x);
template <typename SType>
void GT(const Tensor &in, const SType x, Tensor *out);
/// Element-wise operation, out[i]= (in1[i] > in2[i]) ? 1.f : 0.f
Tensor operator>(const Tensor &in1, const Tensor& in2);
void GT(const Tensor &in1, const Tensor& in2, Tensor *out);
/// Element-wise operation, out[i]= (in[i] >= x) ? 1.f : 0.f
template <typename SType>
Tensor operator>=(const Tensor &in, const SType x);
template <typename SType>
void GE(const Tensor &in, const SType x, Tensor *out);
/// Element-wise operation, out[i]= (in1[i] >= in2[i]) ? 1.f : 0.f
Tensor operator>=(const Tensor &in1, const Tensor& in2);
void GE(const Tensor &in1, const Tensor& in2, Tensor *out);
Tensor operator+(const Tensor &lhs, const Tensor &rhs);
void Add(const Tensor &lhs, const Tensor &rhs, Tensor *out);
Tensor operator-(const Tensor &lhs, const Tensor &rhs);
void Sub(const Tensor &lhs, const Tensor &rhs, Tensor *out);
Tensor operator*(const Tensor &lhs, const Tensor &rhs);
void EltwiseMult(const Tensor &lhs, const Tensor &rhs, Tensor *out);
Tensor operator/(const Tensor &lhs, const Tensor &rhs);
void Div(const Tensor &lhs, const Tensor &rhs, Tensor *out);
template <typename SType>
Tensor operator+(const Tensor &in, const SType x);
template <typename SType>
void Add(const Tensor &in, const SType x, Tensor *out);
template <typename SType>
Tensor operator-(const Tensor &in, const SType x);
template <typename SType>
void Sub(const Tensor &in, const SType x, Tensor *out);
template <typename SType>
Tensor operator*(const Tensor &in, const SType x);
template <typename SType>
void EltwiseMult(const Tensor &in, const SType x, Tensor *out);
/// For each element e of Tensor 'in', compute e / x
template <typename SType>
Tensor operator/(const Tensor &in, const SType x);
/// For each element e of Tensor 'in', compute e / x into out
template <typename SType>
void Div(const Tensor &in, const SType x, Tensor *out);
/// For each element e of Tensor 'in', compute x/e
template <typename SType>
Tensor Div(const SType x, const Tensor &in);
/// For each element e of Tensor 'in', compute x/e into 'out'
template <typename SType>
void Div(const SType x, const Tensor &in, Tensor *out);
template <typename SType = float>
SType Sum(const Tensor &in);
// ============Matrix (row/column) operations==================================
/// Average elements in the Tensor, currently only support vector and matrix.
/// if 'axis' is 0, average all rows into a single row
/// if 'axis' is 1, average all columns into a single column
/// TODO(wangwei) support arbitrary Tensor like numpy.average
Tensor Average(const Tensor &in, const int axis);
/// Add column 'v' with each column of matrix M
void AddColumn(const Tensor &v, Tensor *M);
/// For each column 'c' of matrix out, do c=alpha*v + beta*c
template <typename SType>
void AddColumn(const SType alpha, const SType beta, const Tensor &v,
Tensor *out);
/// Add row 'v' with each row of matrix M; write results into 'out'
void AddRow(const Tensor &v, Tensor *out);
/// For each row 'r' of matrix out, do r=alpha*v + beta*r
template <typename SType>
void AddRow(const SType alpha, const SType beta, const Tensor &v, Tensor *M);
/// Divide column 'v' by each column of matrix M; write results into 'out'
void DivColumn(const Tensor &v, Tensor *M);
/// Divide row 'v' by each row of matrix M; write results into 'out'
void DivRow(const Tensor &v, Tensor *M);
/// Multiply column 'v' and each column of matrix M; write results into 'out'
void MultColumn(const Tensor &v, Tensor *M);
/// Multiply row 'v' with each row of matrix M; write results into 'out'
void MultRow(const Tensor &v, Tensor *M);
/// Do softmax for each row. 'in' could be a 1-d or 2-d Tensor.
Tensor SoftMax(const Tensor &in);
/// Do softmax for each row. 'in' could be a 1-d or 2-d Tensor.
void SoftMax(const Tensor &in, Tensor *out);
/// Sub column 'v' by each column of matrix M
void SubColumn(const Tensor &v, Tensor *M);
/// Sub row 'v' by each row of matrix M; write results into 'out'
void SubRow(const Tensor &v, Tensor *M);
/// Sum all columns of matrix M into a single column as 'out'
void SumColumns(const Tensor &M, Tensor *out);
/// Sum all rows of matrix M into a single row as 'out'
void SumRows(const Tensor &M, Tensor *out);
/// Sum elements in the Tensor, currently only support vector and matrix.
/// if 'axis' is 0, sum all rows into a single row
/// if 'axis' is 1, sum all columns into a single column
/// TODO(wangwei) support arbitrary Tensor like numpy.sum
Tensor Sum(const Tensor &in, const int axis);
// ================Random operations==========================================
/// For each element x set x = 1 if random() < p; otherwise x = 1.
template <typename SType>
void Bernoulli(const SType p, Tensor *out);
/// Fill in Tensor 't' following Gaussian distribution.
template <typename SType>
void Gaussian(const SType mean, const SType std, Tensor *out);
/// Fill in Tensor 't' following uniform distribution.
template <typename SType>
void Uniform(const SType low, const SType high, Tensor *out);
// ================Blas operations============================================
// TODO(wangwei) make amax/amin/asum a member function of tensor
/// out = alpha*in + out
template <typename SType>
void Axpy(SType alpha, const Tensor &in, Tensor *out);
/// Do matrix vector multipication or matrix matrix multiplication depdending
/// on the Tensor shape. result = A * B
Tensor Mult(const Tensor &A, const Tensor &B);
/// Do matrix vector multipication or matrix matrix multiplication depdending
/// on the Tensor shape. C = A * B
void Mult(const Tensor &A, const Tensor &B, Tensor *C);
/// Do matrix vector multipication or matrix matrix multiplication depdending
/// on the Tensor shape. out = alpha lhs * rhs + beta * out
template <typename SType>
void Mult(const SType alpha, const Tensor &A, const Tensor &B, const SType beta,
Tensor *C);
// *****************
// Misc.
// ****************
/// Compute the cross entropy loss given the prediction probability 'p' and
/// the target (ground truth) labels 't'. 'p' and 't' are either 1-d vector
/// or 2-d matrix. 'loss' is 1-d vector. The loss is computed into p.
void ComputeCrossEntropy(const Tensor &p, const Tensor &t, Tensor *loss);
/// Compute the dx, given prediction probability 'p' (p=softmax(x)) and
/// the target (ground truth) labels 't'. 'p' and 't' are either 1-d vector
/// or 2-d matrix. 'grad' has the same shape as 'p'. dx is computed into p.
void SoftmaxCrossEntropyBwd(const Tensor &t, Tensor *p);
/// Return a tensor consisting of rows ([start, end)) from 'in'. It shares the
/// memory with 'in'. 'in' is a 1D or 2D Tensor.
Tensor SliceRows(const Tensor &in, const size_t start, const size_t end);
/// Return a tensor consisting of rows ([start, end)) from 'in'. It copies the
/// values from 'in'. 'in' ia a 2D Tensor.
Tensor CopyRows(const Tensor &in, const size_t start, const size_t end);
/// Return a tensor consisting of columns ([start, end)) from 'in'. It copies
/// the values from 'in'. 'in' is a 2D Tensor.
Tensor CopyColumns(const Tensor &in, const size_t start, const size_t end);
/// Return a tensor which is vertically stacked from tensors in 'in'. Each
/// tensor in 'in' is a 2D tensor. Values are copied, no memory sharing.
Tensor ConcatenateRows(const vector<Tensor> &in);
/// Return a tensor which is horizontally stacked from tensors in 'in'. Each
/// tensor in 'in' is a 2D tensor. Values are copied, no memory sharing.
Tensor ConcatenateColumns(const vector<Tensor> &in);
} // namespace singa