This is the documentation for mshadow: A Lightweight CPU/GPU Matrix/Tensor Template Library in C++/CUDA.
./mkdoc.sh
to make the document locallyExpression is the key concept in mshadow, a common operation of mshadow is tensor = some code to construct expression
There are three major types of expression:
weight = - eta * (grad + lambda * weight)
is a Mapper expression.A = A * 2 + B
, making lvalue appear in expression, the results are still correct.dst = mirror(src)
is a chainer expression dot(lhs.T(), rhs)
, is complex expression, we can not write dst = 1.0 + dot(lhs.T(), rhs)
dst += 2.0f * dot(lhs.T(), rhs)
The basic binary operators are overloaded to composite Mapper expressions, so we can write
weight = (-eta) * (grad + lambda * weight);
We can also use customized binary operators, and unary operators:
struct maximum { MSHADOW_XINLINE static float Map(float a, float b) { return a > b ? a : b; } }; template<typename xpu> void ExampleMaximum(Tensor<xpu, 2> out, const Tensor<xpu, 2> &A, const Tensor<xpu, 2> &B) { out= 10.0f * F<maximum>(A+1.0f, B); } struct sigmoid { MSHADOW_XINLINE static float Map(float a) { return 1.0f/(1.0f+expf(-a)); } }; template<typename xpu> void ExampleSigmoid(Tensor<xpu, 2> out, const Tensor<xpu, 2> &in) { // equivalent to out = sigmoid(in*2) + 1; out = F<op::plus>(F<sigmoid>(in * 2.0f), ScalarExp(1.0f)); }
Matrix multiplications are supported by following syntax, with things brackets [] are optional
dst <sv> [scale*] dot(lhs [.T()] , rhs [.T()]), <sv> can be =,+=,-=
Example:
template<typename xpu> void Backprop(Tensor<xpu, 2> gradin, const Tensor<xpu, 2> &gradout, const Tensor<xpu, 2> &netweight) { gradin = 2.0 * dot(gradout, netweight.T()); }
Naming conventions:
Tensor<xpu, dim>
to refer to any Tensor with device any device and dimension.xpu
, dim
, are implicit template parameters.Expr<xpu, dim>
will be used to refer to any mapper expression with type Tensor<xpu,dim>
.List of functions:
======
reshape(Expr<xpu,dim> src, Shape<dimdst> oshape)
Tensor<xpu, dimdst>
with shape=oshape
, is Mapper expressionvoid ExampleReshape(void) { Tensor<cpu, 2> dst = NewTensor<cpu>(Shape2(4, 5)); Tensor<cpu, 1> src = NewTensor<cpu>(Shape1(20), 1.0f); dst = reshape(src, dst.shape_); ... }
======
broadcast<dimcast>(Tensor<xpu,1> src, Shape<dimdst> oshape)
Tensor<xpu, dimdst>
, shape = oshape
, is Chainer expressionvoid ExampleBroadcast(void) { Tensor<cpu, 2> dst = NewTensor<cpu>(Shape2(2, 3)); Tensor<cpu, 1> src = NewTensor<cpu>(Shape1(2), 1.0f); src[0] = 2.0f; src[1] = 1.0f; dst = broadcast<0>(src, dst.shape_); // dst[0][0] = 2, dst[0][1] = 2; dst[1][0]=1, dst[1][1] = 1 ... }
======
repmat(Tensor<xpu, 1> src, int nrows)
Tensor<xpu, 2>
, with shape=(nrows, src.size(0))
, is Chainer expressionvoid ExampleRepmat(void) { Tensor<cpu,2> dst = NewTensor<cpu>(Shape2(3, 2)); Tensor<cpu,1> src = NewTensor<cpu>(Shape1(2), 1.0f); src[0] = 2.0f; src[1] = 1.0f; dst = repmat(src, 3); // dst[0][0] = 2, dst[0][1] = 1; dst[1][0]=2, dst[1][1] = 1 ... }
======
sumall_except_dim<dimkeep>(Expr<xpu,dim> src)
Tensor<xpu, 1>
, with shape=(src.size(dimkeep))
, is Complex expressionvoid ExampleSumAllExceptDim(void) { Tensor<cpu,3> src = NewTensor<cpu>(Shape3(2, 3, 2), 1.0f); Tensor<cpu,1> dst = NewTensor<cpu>(Shape1(3), 1.0f); dst += sum_all_except<1>(src * 2.0f); // dst[0] = 1.0 + 4.0 *2.0 = 9.0 ... }
======
sum_rows(Expr<xpu, 2> src)
Tensor<xpu,1>
, with shape=(src.size(0))
, is Complex expressionvoid ExampleSumRows(void) { Tensor<cpu, 2> src = NewTensor<cpu>(Shape2(3, 2), 1.0f); Tensor<cpu, 1> dst = NewTensor<cpu>(Shape1(2), 1.0f); dst += sum_rows(src + 1.0f); // dst[0] = 1.0 + 3.0 *(1.0+1.0) = 7.0 ... }
======
unpack_patch2col(Expr<xpu,3> img, int psize_y, int p_size_x, int pstride)
output = dot(weight, mat)
to get covolved results, the relations:Tensor<xpu, 2>
, with shape=(in_channel*psize_x*psize_y, out_height*out_width)
, is Chainer expressionvoid ExampleCovolution(Tensor<cpu, 3> dst, Tensor<cpu, 3> src, Tensor<cpu, 2> weight, int ksize, int stride) { int o_height = (src.size(1)- ksize) / stride + 1; int o_width = (src.size(2)- ksize) / stride + 1; utils::Assert(weight.size(1) == src.size(0) * ksize * ksize); TensorContainer<cpu, 2> tmp_col(Shape2(src.size(0) * ksize * ksize, o_height * o_width)); TensorContainer<cpu, 2> tmp_dst(Shape2(weight.size(0), o_height * o_width)); tmp_col = unpack_patch2col(src, ksize, ksize, stride); tmp_dst = dot(weight, tmp_col); dst = reshape(tmp_dst, dst.shape_); }
======
Tensor<xpu, 3>
, with shape = imshape
, is Chainer expressionvoid ExampleDecovolution(Tensor<cpu, 3> bottom, Tensor<cpu, 3> top, Tensor<cpu, 2> weight, int ksize, int stride) { int o_height = (bottom.size(1)- ksize) / stride + 1; int o_width = (bottom.size(2)- ksize) / stride + 1; utils::Assert(weight.size(1) == bottom.size(0) * ksize * ksize); TensorContainer<cpu, 2> tmp_col(Shape2(bottom.size(0) * ksize * ksize, o_height * o_width)); TensorContainer<cpu, 2> tmp_dst(Shape2(weight.size(0), o_height*o_width)); tmp_dst = reshape(top, tmp_dst.shape_); tmp_col = dot(weight.T(), tmp_dst); bottom = pack_col2patch(tmp_col, bottom.shape_, ksize, ksize, stride); }
======
pool<Reducer>(Expr<xpu, dim> img, [Shape<2> pshape,] int ksize_y, int ksize_x, int kstride)
Expr<xpu, dim>
, with shape = (in_channel, (out_height - ksize) / kstride + 1, (out_width - ksize) / kstride + 1)
, or expression in pshapevoid ExampleMaxPooling(TensorContainer<cpu, 3> &data, int ksize, int stride) { TensorContainer<cpu, 3> pooled(Shape3(data.size(0), (data.size(2) - ksize) / kstride + 1), (data.size(1) - ksize) / kstride + 1)); pooled = pool<red::maximum>(data, ksize, ksize, stride); }
======
unpool<Reducer>(Tensor<xpu, 4> data_src, Tensor<xpu, 4> data_pooled, Tensor<xpu, 4> grad_pooled, int ksize_y, int ksize_x, int kstride)
void ExampleMaxUnpooling(Tensor<cpu, 4> &data_src, Tensor<cpu, 4> &data_pooled, Tensor<cpu, 4> &grad_pooled, int ksize, int kstride) { TensorContainer<cpu, 4> grad(data_src.shape_); grad = unpool<red::maximum>(data_src, data_pooled, grad_pooled, ksize, ksize, kstride); }
======
crop(Expr<xpu, dim> src, Shape<2> oshape, int start_height, int start_width)
crop(Expr<xpu, dim> src, Shape<2> oshape)
where the crop will happen in center.void ExampleCrop(TensorContainer<cpu, 3> img, int start_height, int start_width) { TensorContainer<cpu> cropped(Shape3(img.size(0), img.size(1) - start_height, img.size(2) - start_width)); cropped = crop(img, start_height, start_width); }
======
mirrow(Expr<xpu, dim> src)
void ExampleMirror(TensorContainer<cpu, 3> img) { TensorContainer<cpu> mirrored(img.shape_); mirrored = mirror(img); }