blob: ed37cdbc301f24115834d25bdaa6136079cc25d0 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef SINGA_CORE_TENSOR_H_
#define SINGA_CORE_TENSOR_H_
#include <vector>
#include <tuple>
#include <memory>
#include "singa/core/common.h"
#include "singa/core/device.h"
#include "singa/proto/core.pb.h"
#include "singa/utils/logging.h"
using std::vector;
using std::tuple;
namespace singa {
typedef vector<size_t> Shape;
/// hardcode the width of types defined in DataType
const size_t kDataWidth[] = {sizeof(float), sizeof(float) / 2,
sizeof(int), sizeof(char),
sizeof(double), sizeof(unsigned char)
};
inline size_t SizeOf(DataType t) {
static_assert(kNumDataType == sizeof(kDataWidth) / sizeof(size_t),
"Num of data types not match num of data width");
CHECK_GT(kNumDataType, t);
return kDataWidth[t];
}
/// A Tensor instance is a multi-dimensional array resident on a Device
/// (default device is the host CPU). The internal data is allocated in lazy
/// manner.
/// Linear algebra, neural net and random operations are provided against
/// Tensor.
/// For all operations, if the result tensor is passed as an argument,
/// then it must be set up correctly (shape, device). Otherwise, runtime error
/// like SegmentFault would happen. Simple type/device check would be conducted.
class Tensor {
public:
~Tensor();
Tensor();
/// Constructor using default device.
explicit Tensor(const Shape &shape, DataType dtype = kFloat32);
/// Constructor with shape, device and data type
Tensor(const Shape &shape,
std::shared_ptr<Device> dev,
DataType dtype = kFloat32);
/// Copy constructor. No deep copy.
Tensor(const Tensor &from);
/// Move constructor. No deep copy.
Tensor(Tensor &&from);
// --------------------------------------------------------------------------
// ---Following methods return info of the class without making any changes--
// --------------------------------------------------------------------------
/// For functions in xx_math.cc to access the block.
/// Users should not operate against Block directly.
/// block_ is allocated in constructors.
Block *block() const { return block_; }
std::shared_ptr<Device> device() const { return device_; }
/// Return immutable Tensor values with given type.
template <typename SType>
const SType *data() const {
return static_cast<const SType *>(block()->data());
}
/// data type, including kFloat16, kFloat32, kInt
const DataType data_type() const { return data_type_; }
const Shape &shape() const { return shape_; }
const size_t shape(const size_t idx) const {
CHECK_LT(idx, shape_.size());
return shape_.at(idx);
}
size_t nDim() const { return shape_.size(); }
bool empty() const { return nDim() == 0; }
/// The stride should decrease except dim with stride=0 due to broadcasting
bool transpose() const {
if (!stride_.empty()) {
auto last = stride_.front();
for (auto s : stride_) {
if (s > last && last > 0)
return true;
if (s > 0)
last = s;
}
}
return false;
}
const vector<int>& stride() const { return stride_; }
/// Return true if the content of the tensor is initialized
bool initailized() const {
return block_ != nullptr && block_->initialized();
}
/// Return number of total elements
size_t Size() const {
if (block_ == nullptr) return 0u;
CHECK_EQ(block_->size() % SizeOf(data_type_), 0u);
return block_->size() / SizeOf(data_type_);
}
/// Return memory size (i.e., Bytes)
size_t MemSize() const { return block_->size(); }
/// used for swig code to convert Tensor into numpy array.
/// It gets data into 'value'
template <typename SType>
void GetValue(SType *value, const size_t num) {
CHECK(device_ == defaultDevice);
const SType* ptr = data<SType>();
for (size_t i = 0; i < num; i++) value[i] = ptr[i];
}
/// Serialize data, shape and transpose to protobuf object.
void ToProto(singa::TensorProto *proto) const;
/// Return average L1 norm
float L1() const;
/// Return average L2 norm
float L2() const;
// --------------------------------------------------------------------------
// ---Following methods changes the internal data
// --------------------------------------------------------------------------
/// Set each element of the tensor to be x
template <typename SType>
void SetValue(const SType x);
/// For init the tensor values, copy 'num' elements from 'src' to the internal
/// memory with 'offset' (elements).
template <typename SType>
void CopyDataFromHostPtr(const SType *src, const size_t num,
const size_t offset = 0);
/// Copy data from another Tensor which may be on a diff device.
/// Meta data would not be copied!
void CopyData(const Tensor &other);
/// Deserialize data, shape and transpose from protobuf object.
void FromProto(const singa::TensorProto &proto);
/// TODO(wangwei) merge RepeatData into Repeat?
void RepeatData(const vector<size_t>& repeats, int axis, int total_repeats,
const Tensor &other);
// --------------------------------------------------------------------------
// ---Following methods returns a new Tensor without change original tensor
// --------------------------------------------------------------------------
Tensor Repeat(const vector<size_t>& repeats, int axis,
std::shared_ptr<Device> device = nullptr);
/// return an exactly the same Tensor with data been deep copied to the given
/// device. If 'device' is nullptr, then clone it one the current device.
Tensor Clone(std::shared_ptr<Device> device = nullptr) const;
// --------------------------------------------------------------------------
// ---Following methods change the tensor and return itself
// --------------------------------------------------------------------------
/// Copy assignment
Tensor &operator=(const Tensor &in);
/// Move assignment
Tensor &operator=(Tensor &&in);
Tensor &operator+=(const Tensor &in);
Tensor &operator-=(const Tensor &in);
Tensor &operator*=(const Tensor &in);
Tensor &operator/=(const Tensor &in);
// Scalar operations.
/// SType is a scalar type
template <typename SType>
Tensor &operator+=(const SType x);
/// SType is a scalar type
template <typename SType>
Tensor &operator-=(const SType x);
/// SType is a scalar type
template <typename SType>
Tensor &operator*=(const SType x);
/// SType is a scalar type
template <typename SType>
Tensor &operator/=(const SType x);
/// change the shape (and stride); the block may be reallocated.
Tensor &Reshape(const Shape &shape);
/// Resize the memory and return itself
Tensor& Resize(const Shape& shape);
/// Matrix transpose. Valid only if shape.size() == 2.
Tensor& T();
/// Reverse the shape vector
Tensor& Transpose();
/// Change the axes
Tensor& Transpose(const vector<size_t> &axes);
/// Return a view of the input tensor whose shape is broadcasted to be
/// compitable with the given shape
Tensor& Broadcast(const Shape& shape);
/// Reset the shape, device, and data type as given tensor.
/// If block size changes, then reallocate a new block.
/// The previous block would be deleted.
Tensor& ResetLike(const Tensor &t);
/// Reset the data type, it would reallocate block if type changes.
Tensor& AsType(const DataType type);
/// Reset the device.
/// If the target device is a diff device, then do deep data copy.
Tensor& ToDevice(std::shared_ptr<Device> dev);
/// Equivalent to ToDevice(host_dev).
Tensor& ToHost();
protected:
//generate strides automatically if stride field is not passed
void generate_stride() {
stride_.clear();
if (shape_.size() == 0) {
stride_.push_back(1);
return;
}
size_t dim = Size();
int cumulative_product = 1;
for (size_t n = 0; n < shape_.size(); ++n) {
cumulative_product = cumulative_product * shape_[n];
stride_.push_back(dim / cumulative_product);
}
}
void set_strides(const vector<int> new_strides) {
stride_ = new_strides;
}
protected:
DataType data_type_ = kFloat32;
std::shared_ptr<Device> device_ = nullptr;
/// Note: block_ is allocated in lazy manner to avoid frequent malloc/free.
/// If you want to get an allocated Block, use block() instead of block_.
Block *block_ = nullptr;
Shape shape_ = {};
vector<int> stride_ = {};
}; //end of tensor class
inline size_t Product(const Shape &shape, int start = 0, size_t len = 0) {
if (len == 0) len = shape.size();
if (len == 0) return 0;
CHECK_LE(len, shape.size());
size_t v = 1;
for (unsigned int i = start; i < len; i++) v *= shape[i];
return v;
}
inline void CheckDataTypeAndLang(const Tensor &in1, const Tensor &in2) {
CHECK_EQ(in1.data_type(), in2.data_type());
CHECK_EQ(in1.device()->lang(), in2.device()->lang());
}
template <typename FromType, typename ToType>
ToType TypeCast(const FromType &x) {
// TODO(wangwei) cast fp16; prevent some casts, e.g., float to char
return static_cast<ToType>(x);
}
Tensor Boradcast(const Shape& shape);
/// Reshape the given tensor and generate a new tensor; the total vol should match
/// which shares the memory with in if possible
Tensor Reshape(const Tensor &in, const Shape &s);
Tensor Resize(const Tensor &in, const Shape &s);
/// Reverse the shape vector
Tensor Transpose(const Tensor& in);
/// Return a view of the input tensor whose shape is broadcasted to be
/// compitable with the given shape
Tensor Broadcast(const Tensor& in, const Shape& shape);
/// Change the axes
Tensor Transpose(const Tensor& in, const vector<size_t> &axes);
/// Copy 'num' elements of src to dst.
/// The first 'src_offset' ('dst_offset') elements will be skipped.
void CopyDataToFrom(Tensor *dst, const Tensor &src, const size_t num,
const size_t dst_offset = 0, const size_t src_offset = 0);
void RepeatDataToFrom(bool broadcast_flag, const vector<size_t>& repeats, int axis,
Tensor *dst, const Tensor &in, const size_t num);
// =============Element-wise operations====================================
Tensor Abs(const Tensor &in);
Tensor Exp(const Tensor &in);
Tensor Log(const Tensor &in);
Tensor ReLU(const Tensor &in);
Tensor Sigmoid(const Tensor &in);
Tensor Sign(const Tensor &in);
Tensor Sqrt(const Tensor &in);
Tensor Square(const Tensor &in);
Tensor Tanh(const Tensor &in);
Tensor Transform(const Tensor &in);
void Abs(const Tensor &in, Tensor *out);
void Exp(const Tensor &in, Tensor *out);
void Log(const Tensor &in, Tensor *out);
void ReLU(const Tensor &in, Tensor *out);
void Sigmoid(const Tensor &in, Tensor *out);
void Sign(const Tensor &in, Tensor *out);
void Sqrt(const Tensor &in, Tensor *out);
void Square(const Tensor &in, Tensor *out);
void Tanh(const Tensor &in, Tensor *out);
void Transform(const Tensor &in, Tensor *out);
/// Element-wise opeartion, out[i]=in[i]^x
template <typename SType>
Tensor Pow(const Tensor &in, const SType x);
/// Element-wise opeartion, out[i]=in[i]^x
template <typename SType>
void Pow(const Tensor &in, const SType x, Tensor *out);
/// Element-wise opeartion, out[i]=baes[i]^exp[i]
Tensor Pow(const Tensor &base, const Tensor &exp);
/// Element-wise opeartion, out[i]=baes[i]^exp[i]
void Pow(const Tensor &base, const Tensor &exp, Tensor *out);
/// Element-wise operation, out[i]= (in[i] < x) ? 1.f : 0.f
template <typename SType>
Tensor operator<(const Tensor &in, const SType x);
template <typename SType>
void LT(const Tensor &in, const SType x, Tensor *out);
/// Element-wise operation, out[i]= (in1[i] < in2[i]) ? 1.f : 0.f
Tensor operator<(const Tensor &in1, const Tensor& in2);
void LT(const Tensor &in1, const Tensor& in2, Tensor *out);
/// Element-wise operation, out[i]= (in[i] <= x) ? 1.f : 0.f
template <typename SType>
Tensor operator<=(const Tensor &in, const SType x);
template <typename SType>
void LE(const Tensor &in, const SType x, Tensor *out);
/// Element-wise operation, out[i]= (in1[i] <= in2[i]) ? 1.f : 0.f
Tensor operator<=(const Tensor &in1, const Tensor& in2);
void LE(const Tensor &in1, const Tensor& in2, Tensor *out);
/// Element-wise operation, out[i]= (in[i] > x) ? 1.f : 0.f
template <typename SType>
Tensor operator>(const Tensor &in, const SType x);
template <typename SType>
void GT(const Tensor &in, const SType x, Tensor *out);
/// Element-wise operation, out[i]= (in1[i] > in2[i]) ? 1.f : 0.f
Tensor operator>(const Tensor &in1, const Tensor& in2);
void GT(const Tensor &in1, const Tensor& in2, Tensor *out);
/// Element-wise operation, out[i]= (in[i] >= x) ? 1.f : 0.f
template <typename SType>
Tensor operator>=(const Tensor &in, const SType x);
template <typename SType>
void GE(const Tensor &in, const SType x, Tensor *out);
/// Element-wise operation, out[i]= (in1[i] >= in2[i]) ? 1.f : 0.f
Tensor operator>=(const Tensor &in1, const Tensor& in2);
void GE(const Tensor &in1, const Tensor& in2, Tensor *out);
Tensor operator+(const Tensor &lhs, const Tensor &rhs);
void Add(const Tensor &lhs, const Tensor &rhs, Tensor *out);
Tensor operator-(const Tensor &lhs, const Tensor &rhs);
void Sub(const Tensor &lhs, const Tensor &rhs, Tensor *out);
Tensor operator*(const Tensor &lhs, const Tensor &rhs);
void EltwiseMult(const Tensor &lhs, const Tensor &rhs, Tensor *out);
Tensor operator/(const Tensor &lhs, const Tensor &rhs);
void Div(const Tensor &lhs, const Tensor &rhs, Tensor *out);
template <typename SType>
Tensor operator+(const Tensor &in, const SType x);
template <typename SType>
void Add(const Tensor &in, const SType x, Tensor *out);
template <typename SType>
Tensor operator-(const Tensor &in, const SType x);
template <typename SType>
void Sub(const Tensor &in, const SType x, Tensor *out);
template <typename SType>
Tensor operator*(const Tensor &in, const SType x);
template <typename SType>
void EltwiseMult(const Tensor &in, const SType x, Tensor *out);
/// For each element e of Tensor 'in', compute e / x
template <typename SType>
Tensor operator/(const Tensor &in, const SType x);
/// For each element e of Tensor 'in', compute e / x into out
template <typename SType>
void Div(const Tensor &in, const SType x, Tensor *out);
/// For each element e of Tensor 'in', compute x/e
template <typename SType>
Tensor Div(const SType x, const Tensor &in);
/// For each element e of Tensor 'in', compute x/e into 'out'
template <typename SType>
void Div(const SType x, const Tensor &in, Tensor *out);
template <typename SType = float>
SType Sum(const Tensor &in);
// ============Matrix (row/column) operations==================================
/// Average elements in the Tensor, currently only support vector and matrix.
/// if 'axis' is 0, average all rows into a single row
/// if 'axis' is 1, average all columns into a single column
/// TODO(wangwei) support arbitrary Tensor like numpy.average
Tensor Average(const Tensor &in, const int axis);
/// Add column 'v' with each column of matrix M
void AddColumn(const Tensor &v, Tensor *M);
/// For each column 'c' of matrix out, do c=alpha*v + beta*c
template <typename SType>
void AddColumn(const SType alpha, const SType beta, const Tensor &v,
Tensor *out);
/// Add row 'v' with each row of matrix M; write results into 'out'
void AddRow(const Tensor &v, Tensor *out);
/// For each row 'r' of matrix out, do r=alpha*v + beta*r
template <typename SType>
void AddRow(const SType alpha, const SType beta, const Tensor &v, Tensor *M);
/// Divide column 'v' by each column of matrix M; write results into 'out'
void DivColumn(const Tensor &v, Tensor *M);
/// Divide row 'v' by each row of matrix M; write results into 'out'
void DivRow(const Tensor &v, Tensor *M);
/// Multiply column 'v' and each column of matrix M; write results into 'out'
void MultColumn(const Tensor &v, Tensor *M);
/// Multiply row 'v' with each row of matrix M; write results into 'out'
void MultRow(const Tensor &v, Tensor *M);
/// Do softmax for each row. 'in' could be a 1-d or 2-d Tensor.
Tensor SoftMax(const Tensor &in);
Tensor RowMax(const Tensor &in);
/// Do softmax for each row. 'in' could be a 1-d or 2-d Tensor.
void SoftMax(const Tensor &in, Tensor *out);
/// Sub column 'v' by each column of matrix M
void SubColumn(const Tensor &v, Tensor *M);
/// Sub row 'v' by each row of matrix M; write results into 'out'
void SubRow(const Tensor &v, Tensor *M);
/// Sum all columns of matrix M into a single column as 'out'
void SumColumns(const Tensor &M, Tensor *out);
/// Sum all rows of matrix M into a single row as 'out'
void SumRows(const Tensor &M, Tensor *out);
/// Sum elements in the Tensor, currently only support vector and matrix.
/// if 'axis' is 0, sum all rows into a single row
/// if 'axis' is 1, sum all columns into a single column
/// TODO(wangwei) support arbitrary Tensor like numpy.sum
Tensor Sum(const Tensor &in, const int axis);
// ================Random operations==========================================
/// For each element x set x = 1 if random() < p; otherwise x = 1.
template <typename SType>
void Bernoulli(const SType p, Tensor *out);
/// Fill in Tensor 't' following Gaussian distribution.
template <typename SType>
void Gaussian(const SType mean, const SType std, Tensor *out);
/// Fill in Tensor 't' following uniform distribution.
template <typename SType>
void Uniform(const SType low, const SType high, Tensor *out);
// ================Blas operations============================================
// TODO(wangwei) make amax/amin/asum a member function of tensor
/// out = alpha*in + out
template <typename SType>
void Axpy(SType alpha, const Tensor &in, Tensor *out);
/// Do matrix vector multipication or matrix matrix multiplication depdending
/// on the Tensor shape. result = A * B
Tensor Mult(const Tensor &A, const Tensor &B);
/// Do matrix vector multipication or matrix matrix multiplication depdending
/// on the Tensor shape. C = A * B
void Mult(const Tensor &A, const Tensor &B, Tensor *C);
/// Do matrix vector multipication or matrix matrix multiplication depdending
/// on the Tensor shape. out = alpha lhs * rhs + beta * out
template <typename SType>
void Mult(const SType alpha, const Tensor &A, const Tensor &B, const SType beta,
Tensor *C);
// *****************
// Misc.
// ****************
/// Compute the cross entropy loss given the prediction probability 'p' and
/// the target (ground truth) labels 't'. 'p' could be either a 1-d vector for
/// a single instance or a 2-d matrix for a batch of instances. t[i]
/// could be the ground truth label index or a label weighted
/// array of the i-th instance. For example, if there are 3 candidate labels for
/// each instance, t[i] could be 2 or [0, 0, 1]. If one instance could have
/// multiple labels, then t[i] could be [1, 0, 1].
/// The loss is computed into p.
void ComputeCrossEntropy(const Tensor &p, const Tensor &t, Tensor *loss);
/// Compute the dx, given prediction probability 'p' (p=softmax(x)) and
/// the target (ground truth) labels 't'. 'p' and 't' are either 1-d vector
/// or 2-d matrix. 'grad' has the same shape as 'p'. dx is computed into p.
void SoftmaxCrossEntropyBwd(const Tensor &t, Tensor *p);
/// To be called by pysinga autograd operations;
/// swig ignores the const qualifier http://www.swig.org/Doc3.0/SWIGPlus.html#SWIGPlus_const
Tensor CrossEntropyFwd(const Tensor& p, const Tensor& t);
Tensor SoftmaxCrossEntropyBwd(const Tensor& p, const Tensor& t);
/// Return a tensor consisting of rows ([start, end)) from 'in'. It copies the
/// values from 'in'. 'in' ia a 2D Tensor.
Tensor CopyRows(const Tensor &in, const size_t start, const size_t end);
/// Alias of CopyRows
Tensor SliceRows(const Tensor &in, const size_t start, const size_t end);
/// Slice the input tensor along the give axis to generate a new tensor
Tensor SliceOn(const Tensor &in, const size_t start, const size_t end,
int axis);
/// Return a tensor consisting of columns ([start, end)) from 'in'. It copies
/// the values from 'in'. 'in' is a 2D Tensor.
Tensor CopyColumns(const Tensor &in, const size_t start, const size_t end);
/// Alias of CopyColumns
Tensor SliceColumns(const Tensor &in, const size_t start, const size_t end);
/// Return a tensor which is vertically stacked from tensors in 'in'. Each
/// tensor in 'in' is a 2D tensor. Values are copied, no memory sharing.
Tensor ConcatenateRows(const vector<Tensor> &in);
/// Return a tensor concatenated of the input tensors along the give axis.
Tensor ConcatOn(const std::vector<Tensor> &in, int axis);
/// Alias name for function ConcatenateRows
Tensor ConcatRows(const vector<Tensor> &in);
/// Return a tensor which is horizontally stacked from tensors in 'in'. Each
/// tensor in 'in' is a 2D tensor. Values are copied, no memory sharing.
Tensor ConcatenateColumns(const vector<Tensor> &in);
/// Alias name for function ConcatenateColumns
Tensor ConcatColumns(const vector<Tensor> &in);
} // namespace singa
#endif // SINGA_CORE_TENSOR_H_