| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| |
| from __future__ import division |
| |
| from collections import Counter, deque |
| import numpy as np |
| |
| from singa import tensor |
| from singa import utils |
| from .tensor import Tensor |
| from . import singa_wrap as singa |
| |
| CTensor = singa.Tensor |
| training = False |
| |
| |
| def axis_helper(y_shape, x_shape): |
| """ |
| check which axes the x has been broadcasted |
| Args: |
| y_shape: the shape of result |
| x_shape: the shape of x |
| Return: |
| a tuple refering the axes |
| """ |
| res = [] |
| j = len(x_shape) - 1 |
| for i in range(len(y_shape) - 1, -1, -1): |
| if j < 0 or x_shape[j] != y_shape[i]: |
| res.append(i) |
| j -= 1 |
| return tuple(res[::-1]) |
| |
| |
| def back_broadcast(y_shape, x_shape, x): |
| """ |
| for a brodcasted tensor, restore its shape of x from y_shape to x_shape |
| Args: |
| y_shape: the shape of result |
| x_shape: the shape of x |
| x: the input |
| Return: |
| a tensor |
| """ |
| if y_shape != x_shape: |
| x = tensor.from_raw_tensor(x) |
| axis = axis_helper(y_shape, x_shape) |
| x = tensor.sum(x, axis) |
| x = tensor.reshape(x, x_shape) |
| x = x.data |
| return x |
| |
| |
| def infer_dependency(op): |
| """ |
| Infer the dependency of all operations with the |
| given op as the last operation. |
| Operator A is depending on B if A uses the output(s) of B. |
| |
| Args: |
| op: an Operator instance, e.g. the loss operation. |
| |
| Return: |
| a Counter instance with the operation as the key, |
| and the number of operations that are depending on it as the value; |
| and a Counter instance with the id of the output tensor as the key, and |
| the number of operations that are depending on it as the value. |
| """ |
| |
| # current op is not inserted into the dependency_count |
| # if the current op is not a terminal op, then this function may just |
| # count dependency of a branch. |
| op_count = Counter() |
| tensor_count = Counter() |
| queue = deque([op]) |
| while len(queue) > 0: |
| cur_op = queue.pop() |
| for src_op, xid, _, _ in cur_op.src: |
| if src_op not in op_count: |
| op_count[src_op] = 1 |
| queue.append(src_op) |
| else: |
| op_count[src_op] += 1 |
| tensor_count[xid] += 1 |
| return op_count, tensor_count |
| |
| |
| def gradients(y, dy=None): |
| """ |
| Compute the gradients of the output w.r.t the parameters |
| |
| Args: |
| y: the output tensor, e.g., the loss |
| dy: gradient of the target w.r.t y; None indicates the gradient is 1.0; |
| it can be used to rescale the loss. |
| |
| Return: |
| a dictionary storing the gradient tensors of all tensors |
| whose stores_grad is true (e.g. parameter tensors) |
| """ |
| grads = {} # mapping: x->dx if x.stores_grad |
| for p, dp in backward(y, dy): |
| # TODO: this fn is only helper for test case for now. |
| # 1. could implement __hash__ or |
| # 2. make grad as a attribute of tensor class |
| # p.grad = dp |
| grads[id(p)] = dp |
| return grads |
| |
| |
| def backward(y, dy=None): |
| """ |
| Run the backward propagation starting at y. |
| Args: |
| y: a Tensor instance, usually the loss |
| dy: a number or a Tensor instance, for the gradient of the |
| objective/loss w.r.t y, usually None, i.e., 1.0 |
| Return: |
| yeild the parameter (tensor with stores_grad true) and the |
| gradient tensors. |
| """ |
| assert isinstance(y, Tensor), "wrong input type." |
| op_dep, tensor_dep = infer_dependency(y.creator) |
| assert y.size() == 1, ("y must be a Tensor with a single value;" |
| "size of y is % d" % y.size()) |
| |
| # by default the dy is a tensor with 1.0 for each sample; |
| if dy is None: |
| dy = float(1.0) |
| elif isinstance(dy, Tensor): |
| dy = dy.data |
| else: |
| dy = float(dy) |
| |
| # ready is a queue of (operation, dy list) |
| ready = deque([(y.creator, (dy,))]) |
| not_ready = {} # mapping: op->[dy] |
| |
| if y.stores_grad: |
| # gradients[y] = dy |
| if isinstance(dy, float): |
| g = np.array(dy) |
| else: |
| g = dy |
| tg = Tensor(device=g.device(), data=g) |
| yield (y, tg) |
| |
| while len(ready) > 0: |
| op, dys = ready.pop() |
| if not op.requires_grad or isinstance(op, Dummy): |
| continue |
| # if not isinstance(op, tensor.Dummy): |
| dxs = op._do_backward(*dys) |
| # TODO src and dx must match |
| |
| assert len(op.src) == len(dxs), ( |
| "the number of src ops (=%d) and dx (=%d) not match" % |
| (len(op.src), len(dxs))) |
| for (src_op, x_id, y, y_stores_grad), dx in zip(op.src, dxs): |
| # prefix x is w.r.t op; prefix y is w.r.t src_op. |
| # x_id is the python id of one input arg of src_op, denoted as x. |
| # y_idx (below) is the index of x among the outputs of src_op. |
| # not_ready[src_op][y_idx] records the intermediate gradient |
| # of the y_idx'th output of src_op. 'intermediate gradient' |
| # indicates that if this output is used in multiple children |
| # operations, then we have to add the graident (dx) from all these |
| # children operations. When src_op is ready, it means that |
| # the gradient of all its outputs are available, i.e. all children |
| # operations have been backwarded. |
| # y is None if y.stores_grad is false; otherwise it is a Tensor |
| |
| if isinstance(src_op, Dummy) and (not src_op.stores_grad): |
| continue |
| |
| y_idx = src_op.y_id2idx[x_id] |
| if src_op not in not_ready: |
| # src_op may have mulitple outputs |
| not_ready[src_op] = [None for _ in src_op.y_id2idx] |
| not_ready[src_op][y_idx] = dx |
| else: |
| dxs_ = not_ready[src_op] |
| if dxs_[y_idx] is None: |
| dxs_[y_idx] = dx |
| else: |
| # add the gradient from another children operation that |
| # uses y_idx'th output of src_op as input arg |
| dxs_[y_idx] += dx |
| |
| op_dep[src_op] -= 1 |
| tensor_dep[x_id] -= 1 |
| if y_stores_grad and tensor_dep[x_id] == 0: |
| # store the gradient for final return, e.g. for parameters. |
| # it may cause a delay to yield. Only after src_op's all |
| # output tensors have recieved the gradients, then output |
| g = not_ready[src_op][y_idx] |
| tg = Tensor(device=g.device(), |
| data=g, |
| name=src_op.grad_name(y_idx)) |
| yield (y, tg) |
| |
| if op_dep[src_op] == 0: |
| if src_op.requires_grad is True: |
| assert not isinstance( |
| src_op, Dummy), "Dummy op does not do backward()" |
| ready.append((src_op, not_ready[src_op])) |
| del not_ready[src_op] |
| del op # delete the operation to free all tensors from this op |
| |
| |
| class Operator(object): |
| """ |
| An operation includes the forward and backward function of |
| tensor calculation. |
| Steps to add a specific operation Xxxx: |
| 1. create a subclass of Operator, name it as Xxxx |
| 2. override the forward() and backward(); The arguments of forward() |
| and backward() should only include CTensor; |
| """ |
| |
| op_count = 0 |
| |
| def __init__(self, name=None): |
| if name is None: |
| self.name = "{}#{}".format(self.__class__.__name__, |
| Operator.op_count) |
| Operator.op_count += 1 |
| else: |
| self.name = name |
| |
| def __call__(self, *xs): |
| return self._do_forward(*xs) |
| |
| def output_name(self, idx): |
| """ |
| Args: |
| idx: index of the output among all outputs |
| |
| Return: |
| the name of the output tensor |
| """ |
| return "{}:{}".format(self.name, idx) |
| |
| def grad_name(self, idx): |
| """ |
| Args: |
| idx: index of the output among all outputs |
| |
| Return: |
| the name of the gradient of the output tensor |
| """ |
| return "{}_g".format(self.output_name(idx)) |
| |
| def _do_forward(self, *xs): |
| """ |
| Do not call this function from user code. It is called by __call__(). |
| Args: |
| xs, Tensor instance(s) |
| Returns: |
| Tensor instance(s) |
| """ |
| # TODO add the pre hook |
| assert all([isinstance(x, Tensor) for x in xs |
| ]), "xs should include only Tensor instances" |
| |
| # need to do backward if any of its input arg needs gradient |
| self.requires_grad = any([x.requires_grad for x in xs]) |
| |
| self.src = [] |
| for x in xs: |
| if x.stores_grad: |
| # store the tensor whose gradient needs be returned in |
| # backward(), e.g. if x is parameter |
| self.src.append((x.creator, id(x), x, x.stores_grad)) |
| else: |
| # for intermediate tensors, they will be released soon; |
| # no need to store them --> use None |
| self.src.append((x.creator, id(x), None, x.stores_grad)) |
| |
| # get the CTensor (data) if the input arg is Tensor |
| xs = tuple(x.data for x in xs) |
| ys = self.forward(*xs) |
| if not isinstance(ys, tuple): |
| ys = (ys,) |
| # create Tensor based on CTensor(data); |
| # assume outputs are all Tensor instances |
| ys = tuple( |
| Tensor( |
| device=y.device(), |
| data=y, |
| requires_grad=self.requires_grad, |
| creator=self, |
| name=self.output_name(idx), |
| ) for idx, y in enumerate(ys)) |
| # map from python id to output index |
| self.y_id2idx = {id(y): i for i, y in enumerate(ys)} |
| # TODO add the post hook |
| return ys |
| |
| def _do_backward(self, *dys): |
| dxs = self.backward(*dys) |
| if not isinstance(dxs, tuple): |
| dxs = (dxs,) |
| return dxs |
| |
| def forward(self, *xs): |
| """Forward propagation. |
| Args: |
| xs: input args consisting of only CTensors. |
| Returns: |
| CTensor instance(s) |
| """ |
| raise NotImplementedError |
| |
| def backward(self, *dys): |
| """ Backward propagation. |
| Args: |
| dys: input args consisting of only CTensors. |
| Returns: |
| CTensor instance(s) |
| """ |
| raise NotImplementedError |
| |
| def get_params(self): |
| return [] |
| |
| |
| class Dummy(Operator): |
| """Dummy operation whice serves as a placehoder for autograd |
| Args: |
| name(string): set it for debug |
| """ |
| |
| def __init__(self, tensor, name=None): |
| super(Dummy, self).__init__(name) |
| self.src = [] |
| self.y_id2idx = {id(tensor): 0} |
| self.tensor = tensor |
| self.requires_grad = False |
| |
| def output_name(self, idx): |
| return self.name |
| |
| def grad_name(self, idx): |
| return "{}_g".format(self.name) |
| |
| def __getattr__(self, name): |
| return self.tensor.__getattribute__(name) |
| |
| |
| class Mean(Operator): |
| """ |
| Element-wise mean of each of the input CTensors. |
| """ |
| |
| def __init__(self): |
| super(Mean, self).__init__() |
| |
| def forward(self, *l): |
| """ |
| Args: |
| l (a list of CTensor): a list of CTensor for element-wise mean. |
| Returns: |
| a new CTensor. |
| """ |
| if training: |
| self.l = len(l) |
| assert (len(l) > 0) |
| x = singa.Tensor(list(l[0].shape()), l[0].device()) |
| x.SetFloatValue(0.0) |
| for i in range(len(l)): |
| x += l[i] |
| return singa.MultFloat(x, 1 / len(l)) |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): dL / dy. |
| Returns: |
| a list of dx (CTensor). |
| """ |
| return [singa.MultFloat(dy, 1 / self.l)] * self.l |
| |
| |
| def mean(*l): |
| """ |
| Element-wise mean of each of the input tensors. |
| Args: |
| l (a list of Tensor): element-wise mean operator. |
| Returns: |
| a new Tensor. |
| """ |
| return Mean()(*l)[0] |
| |
| |
| class ReLU(Operator): |
| """ |
| Relu means rectified linear function, i.e, y = max(0, x) is applied to the |
| CTensor elementwise. |
| """ |
| |
| def __init__(self): |
| super(ReLU, self).__init__() |
| |
| def forward(self, x): |
| """ |
| Args: |
| x (CTensor): input tensor. |
| Returns: |
| a new CTensor whose element y = x if x >= 0; otherwise 0. |
| """ |
| if training: |
| self.input = x |
| return singa.ReLU(x) |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): dL / dy. |
| Returns: |
| dx (CTensor): dL / dx = dy if x >= 0; otherwise 0. |
| """ |
| return singa.ReLUBackward(dy, self.input) |
| |
| |
| def relu(x): |
| """ |
| Relu means rectified linear function, i.e, y = max(0, x) is applied to the |
| CTensors elementwise. |
| Args: |
| x (Tensor): input tensor. |
| Returns: |
| a new Tensor whose element y = x if x >= 0; otherwise 0. |
| """ |
| return ReLU()(x)[0] |
| |
| |
| class Less(Operator): |
| """ |
| Returns the tensor resulted from performing the less logical operation |
| elementwise on the input CTensors x and y. |
| """ |
| |
| def __init__(self): |
| super(Less, self).__init__() |
| |
| def forward(self, x, y): |
| """ |
| Return a<b, where a and b are CTensor. |
| """ |
| cur = singa.LTFloat(singa.__sub__(x, y), 0) |
| if training: |
| self.cache = cur |
| return cur |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): data for the dL / dy, L is the loss. |
| Raises: |
| AssertionError: no backward function for this operator. |
| """ |
| assert False, ('no backward function for less') |
| |
| |
| def less(x, y): |
| """ |
| Return a<b, where a and b are CTensor. |
| """ |
| return Less()(x, y)[0] |
| |
| |
| class Clip(Operator): |
| """ |
| Clip operator limits the given input within an interval. The interval |
| is specified by the inputs 'min' and 'max'. |
| """ |
| |
| def __init__(self, min, max): |
| """ |
| Args: |
| min (float): min value, under which element is replaced by min. |
| max (float): max value, above which element is replaced by max. |
| """ |
| super(Clip, self).__init__() |
| self.max = max |
| self.min = min |
| |
| def forward(self, x): |
| """ |
| Args: |
| x (CTensor): input tensor |
| Returns: |
| a new CTensor with np.clip(x,min,max) |
| """ |
| self.mask = singa.Tensor(list(x.shape()), x.device()) |
| self.mask.SetFloatValue(1.0) |
| |
| if self.min is not None: |
| self.min = float(self.min) |
| mask0 = singa.LTFloat(x, self.min) |
| mask1 = singa.GEFloat(x, self.min) |
| self.mask = singa.__mul__(mask1, self.mask) |
| x = singa.__add__(singa.MultFloat(mask0, self.min), |
| singa.__mul__(mask1, x)) |
| |
| if self.max is not None: |
| self.max = float(self.max) |
| mask0 = singa.GTFloat(x, self.max) |
| mask1 = singa.LEFloat(x, self.max) |
| self.mask = singa.__mul__(mask1, self.mask) |
| x = singa.__add__(singa.MultFloat(mask0, self.max), |
| singa.__mul__(mask1, x)) |
| |
| return x |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): dL / dy |
| Returns: |
| dx (CTensor): dL / dx |
| """ |
| return singa.__mul__(dy, self.mask) |
| |
| |
| def clip(x, min=None, max=None): |
| """ |
| Clip operator limits the given input within an interval. The interval |
| is specified by the inputs 'min' and 'max'. |
| Args: |
| x (Tensor): input tensor |
| min (float): Minimum value, under which element is replaced by min. |
| max (float): Maximum value, above which element is replaced by max. |
| Returns: |
| a new Tensor with np.clip(x,min,max). |
| """ |
| return Clip(min, max)(x)[0] |
| |
| |
| class Identity(Operator): |
| """ |
| Init a identity operator |
| """ |
| |
| def __init__(self): |
| super(Identity, self).__init__() |
| |
| def forward(self, x): |
| """ |
| Args: |
| x (CTensor): input tensor. |
| Returns: |
| the same CTensor x. |
| """ |
| return x |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): dL / dy. |
| Returns: |
| dx (CTensor): dL / dx. |
| """ |
| return dy |
| |
| |
| def identity(x): |
| """ |
| Init a identity operator. |
| Args: |
| x (Tensor): input tensor. |
| Returns: |
| the same Tensor with x. |
| """ |
| return Identity()(x)[0] |
| |
| |
| class Matmul(Operator): |
| """ |
| Init matrix multiplication operator. |
| """ |
| |
| def __init__(self): |
| super(Matmul, self).__init__() |
| |
| def forward(self, x, w): |
| """ |
| Return `np.matmul(x,w)`, where x and w are CTensor. |
| """ |
| # todo, cannot do Mult for dims more than 2 |
| if training: |
| self.input = (x, w) |
| res = singa.Mult(x, w) |
| return res |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): data for the dL / dy, L is the loss. |
| Returns: |
| a tuple for (dx, dw). |
| """ |
| return ( |
| singa.Mult(dy, singa.DefaultTranspose(self.input[1])), |
| singa.Mult(singa.DefaultTranspose(self.input[0]), dy), |
| ) |
| |
| |
| def matmul(x, w): |
| """ |
| Return `np.matmul(x,w)`, where x and w are Tensor. |
| """ |
| return Matmul()(x, w)[0] |
| |
| |
| class Greater(Operator): |
| """ |
| Returns the tensor resulted from performing the greater logical |
| operation elementwise on the input tensors A and B. |
| """ |
| |
| def __init__(self): |
| super(Greater, self).__init__() |
| |
| def forward(self, x, y): |
| """ |
| Return a>b, where a and b are CTensor. |
| """ |
| cur = singa.GTFloat(singa.__sub__(x, y), 0) |
| if training: |
| self.cache = cur |
| return cur |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): data for the dL / dy, L is the loss. |
| Raises: |
| AssertionError: no backward function for this operator. |
| """ |
| assert False, ('no backward function for greater') |
| |
| |
| def greater(x, y): |
| """ |
| Return a>b, where a and b are Tensor. |
| """ |
| return Greater()(x, y)[0] |
| |
| |
| class AddBias(Operator): |
| """ |
| Add Bias to each row / column of the Tensor, depending on the axis arg. |
| """ |
| |
| def __init__(self, axis=0): |
| """ |
| To indicate the calculation axis, 0 for row, 1 for column. |
| Args: |
| axis (int): 0 or 1, default is 0. |
| """ |
| super(AddBias, self).__init__() |
| self.axis = axis |
| |
| def forward(self, x, b): |
| """ |
| Args: |
| x (CTensor): matrix. |
| b (CTensor): bias to be added. |
| Return: |
| the result Tensor |
| """ |
| if self.axis == 0: |
| singa.AddRow(b, x) |
| elif self.axis == 1: |
| singa.AddColumn(b, x) |
| return x |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): data for the dL / dy, L is the loss. |
| Return: |
| a tuple for (db, dx), db is data for dL / db, dx is data |
| for dL / dx. |
| """ |
| dtype = dy.data_type() |
| _dy = dy.AsType(tensor.float32) |
| if self.axis == 0: |
| return dy, singa.Sum(_dy, 0).AsType(dtype) |
| elif self.axis == 1: |
| return dy, singa.Sum(_dy, 0).AsType(dtype) |
| |
| |
| def add_bias(x, b, axis=0): |
| """ |
| Add Bias to each row / column of the Tensor, depending on the axis arg. |
| Args: |
| x (Tensor): matrix. |
| b (Tensor): bias to be added. |
| axis (int): 0 or 1, default is 0. |
| Return: |
| the result Tensor |
| """ |
| assert x.ndim() == 2, "1st arg required 2d tensor. got shape: %s" % ( |
| x.shape) |
| assert b.ndim() == 1, "2nd arg required 1d tensor. got shape: %s" % ( |
| b.shape) |
| assert axis in [0, 1], "allowed axis: 0 or 1" |
| return AddBias(axis)(x, b)[0] |
| |
| |
| class Reshape(Operator): |
| """ |
| Reshape the input tensor similar to np.reshape. |
| """ |
| |
| def __init__(self, shape): |
| """ |
| Args: |
| shape (list of int): Specified shape for output. At most one |
| dimension of the new shape can be -1. In this case, the |
| value is inferred from the size of the tensor and the |
| remaining dimensions. A dimension could also be 0, |
| in which case the actual dimension value is unchanged |
| (i.e. taken from the input tensor). |
| """ |
| super(Reshape, self).__init__() |
| self.shape = shape |
| |
| def forward(self, x): |
| """ |
| Args: |
| x (CTensor): matrix. |
| Return: |
| the result CTensor |
| """ |
| self._shape = x.shape() |
| shape = list(self.shape) |
| # handle the shape with 0 |
| shape = [ |
| self._shape[i] |
| if i < len(self._shape) and shape[i] == 0 else shape[i] |
| for i in range(len(shape)) |
| ] |
| # handle the shape with -1 |
| hidden_shape = int(np.prod(self._shape) // np.abs(np.prod(shape))) |
| self.cache = [int(s) if s != -1 else hidden_shape for s in shape] |
| return singa.Reshape(x, self.cache) |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): dL / dy |
| Returns: |
| dx (CTensor): dL / dx |
| """ |
| return singa.Reshape(dy, self._shape) |
| |
| |
| def reshape(x, shape): |
| """ |
| Reshape the input tensor similar to mp.reshape. |
| Args: |
| x (Tensor): matrix. |
| shape (list of int): Specified shape for output. At most one |
| dimension of the new shape can be -1. In this case, the |
| value is inferred from the size of the tensor and the |
| remaining dimensions. A dimension could also be 0, |
| in which case the actual dimension value is unchanged |
| (i.e. taken from the input tensor). |
| Return: |
| the result Tensor |
| """ |
| return Reshape(shape)(x)[0] |
| |
| |
| class PRelu(Operator): |
| """ |
| PRelu applies the function `f(x) = slope * x` for x < 0, |
| `f(x) = x` for x >= 0 to the data tensor elementwise. |
| """ |
| |
| def __init__(self): |
| super(PRelu, self).__init__() |
| |
| def forward(self, x, slope): |
| """ |
| Args: |
| x (CTensor): matrix. |
| Return: |
| the result CTensor |
| """ |
| mask0 = singa.LTFloat(x, 0.0) |
| res = singa.__mul__(x, mask0) |
| res = singa.__mul__(res, slope) |
| res += singa.ReLU(x) |
| if training: |
| self.input = x |
| self.slope = slope |
| self.mask0 = mask0 |
| self.shape0 = list(x.shape()) |
| self.shape1 = list(slope.shape()) |
| self.shape3 = list(res.shape()) |
| return res |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): dL / dy |
| Returns: |
| dx (CTensor): dL / dx |
| """ |
| dx1mask = singa.GEFloat(self.input, 0.0) |
| dx2 = singa.__mul__(self.mask0, self.slope) |
| dx = singa.__add__(dx1mask, dx2) |
| dx = singa.__mul__(dy, dx) |
| dslope = singa.__mul__(dy, singa.__mul__(self.mask0, self.input)) |
| if (type(dy) == float) or self.shape0 == self.shape1: |
| assert self.shape0 == self.shape1, ('should have same shape') |
| return dx, dslope |
| # handle broadcast |
| dx = back_broadcast(self.shape3, self.shape0, dx) |
| dslope = back_broadcast(self.shape3, self.shape1, dslope) |
| return dx, dslope |
| |
| |
| def prelu(x, slope): |
| """ |
| PRelu applies the function `f(x) = slope * x` for x < 0, |
| `f(x) = x` for x >= 0 to the data tensor elementwise. |
| Args: |
| x (Tensor): matrix. |
| Return: |
| the result Tensor |
| """ |
| return PRelu()(x, slope)[0] |
| |
| |
| class Add(Operator): |
| """ |
| Performs element-wise binary addition. |
| """ |
| |
| def __init__(self): |
| super(Add, self).__init__() |
| |
| def forward(self, a, b): |
| """ |
| Return `a+b`, where a and b are CTensor. |
| """ |
| res = singa.__add__(a, b) |
| if training: |
| self.shape0 = list(a.shape()) |
| self.shape1 = list(b.shape()) |
| self.shape3 = list(res.shape()) |
| return res |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy(CTensor): dL / dy |
| Return: |
| a tuple for (dx0, dx1), dx0 is data for dL / da, dx1 is data |
| for dL / db. |
| """ |
| dx0, dx1 = dy, dy |
| if (type(dy) == float) or self.shape0 == self.shape1: |
| assert self.shape0 == self.shape1, ('should have same shape') |
| return dx0, dx1 |
| # handle broadcast |
| dx0 = back_broadcast(self.shape3, self.shape0, dx0) |
| dx1 = back_broadcast(self.shape3, self.shape1, dx1) |
| return dx0, dx1 |
| |
| |
| def add(a, b): |
| """ |
| Return `a+b`, where a and b are Tensor. |
| """ |
| return Add()(a, b)[0] |
| |
| |
| class Elu(Operator): |
| """ |
| `f(x) = alpha * (exp(x) - 1.)` for x < 0, `f(x) = x` for x >= 0., is applied to |
| the tensor elementwise. |
| """ |
| |
| def __init__(self, alpha=1.): |
| """ |
| Args: |
| alpha (float): Coefficient of ELU, default is 1.0 |
| """ |
| super(Elu, self).__init__() |
| self.alpha = alpha |
| |
| def forward(self, x): |
| """ |
| Args: |
| x (CTensor): matrix |
| Returns: |
| a CTensor for the result |
| """ |
| #f(x) = alpha * (exp(x) - 1.) for x < 0, f(x) = x for x >= 0 |
| if training: |
| self.input = x |
| x1 = singa.LTFloat(x, 0.0) |
| x1 *= x |
| x1 = singa.MultFloat(singa.SubFloat(singa.Exp(x1), 1.0), self.alpha) |
| x2 = singa.ReLU(x) |
| x1 += x2 |
| return x1 |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): dL / dy |
| Returns: |
| dx (CTensor): dL / dx |
| """ |
| dx1mask = singa.LTFloat(self.input, 0.0) |
| dx = singa.MultFloat(singa.Exp(self.input), self.alpha) |
| dx *= dx1mask |
| |
| dx2mask = singa.GEFloat(self.input, 0.0) |
| |
| dx += dx2mask |
| dx *= dy |
| return dx |
| |
| |
| def elu(x, alpha=1): |
| """ |
| `f(x) = alpha * (exp(x) - 1.)` for x < 0, `f(x) = x` for x >= 0., is applied to |
| the tensor elementwise. |
| Args: |
| x (Tensor): matrix |
| alpha (float): Coefficient of ELU, default is 1.0 |
| Returns: |
| a Tensor for the result |
| """ |
| return Elu(alpha)(x)[0] |
| |
| |
| class Equal(Operator): |
| """ |
| Returns the tensor resulted from performing the equal logical operation |
| elementwise on the input tensors x and y. |
| """ |
| |
| def __init__(self): |
| super(Equal, self).__init__() |
| |
| def forward(self, x, y): |
| """ |
| Return `a=b`, where a and b are CTensor. |
| """ |
| return singa.__eq__(x, y) |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): data for the dL / dy, L is the loss |
| Raises: |
| AssertionError: no backward function for this operator |
| """ |
| assert False, ('no backward function for equal') |
| |
| |
| def equal(x, y): |
| """ |
| Return `a=b`, where a and b are Tensor. |
| """ |
| return Equal()(x, y)[0] |
| |
| |
| class SeLU(Operator): |
| """ |
| `y = gamma * (alpha * e^x - alpha)` for x <= 0, `y = gamma * x` for x > 0 |
| is applied to the tensor elementwise. |
| """ |
| |
| def __init__(self, alpha=1.67326, gamma=1.0507): |
| """ |
| Args: |
| alpha (float): Coefficient of SELU default to 1.67326 |
| gamma (float): Coefficient of SELU default to 1.0507 |
| """ |
| super(SeLU, self).__init__() |
| self.alpha = alpha |
| self.gamma = gamma |
| |
| def forward(self, x): |
| """ |
| Args: |
| x (CTensor): matrix |
| Returns: |
| a CTensor for the result |
| """ |
| #y = gamma * (alpha * e^x - alpha) for x <= 0, y = gamma * x for x > 0 |
| if training: |
| self.input = x |
| x1 = singa.LEFloat(x, 0.0) |
| x1 *= x |
| x1 = singa.MultFloat(singa.SubFloat(singa.Exp(x1), 1.0), |
| self.alpha * self.gamma) |
| x2 = singa.ReLU(x) |
| x2 = singa.MultFloat(x2, self.gamma) |
| x1 += x2 |
| return x1 |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): dL / dy |
| Returns: |
| dx (CTensor): dL / dx |
| """ |
| dx1mask = singa.LEFloat(self.input, 0.0) |
| dx1 = singa.MultFloat(singa.Exp(self.input), self.gamma * self.alpha) |
| dx1 = singa.__mul__(dx1mask, dx1) |
| |
| dx2mask = singa.GTFloat(self.input, 0.0) |
| dx2 = singa.MultFloat(dx2mask, self.gamma) |
| |
| dx = singa.__add__(dx1, dx2) |
| dx *= dy |
| return dx |
| |
| |
| def selu(x, alpha=1.67326, gamma=1.0507): |
| """ |
| `y = gamma * (alpha * e^x - alpha)` for x <= 0, `y = gamma * x` for x > 0 |
| is applied to the tensor elementwise. |
| Args: |
| x (Tensor): matrix |
| alpha (float): Coefficient of SELU default to 1.67326 |
| gamma (float): Coefficient of SELU default to 1.0507 |
| Returns: |
| a Tensor for the result |
| """ |
| return SeLU(alpha, gamma)(x)[0] |
| |
| |
| class SoftMax(Operator): |
| """ |
| Apply SoftMax for each row of the Tensor or each column of the Tensor |
| according to the parameter axis. |
| """ |
| |
| def __init__(self, axis=1): |
| """ |
| Args: |
| axis (int): axis of softmax, default to 1 |
| """ |
| super(SoftMax, self).__init__() |
| self.axis = axis |
| |
| def forward(self, x): |
| """ |
| Args: |
| x (CTensor): the input 1d or 2d tensor |
| Returns: |
| the result CTensor |
| """ |
| self.output = singa.SoftMax(x, self.axis) |
| return self.output |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): dL / dy |
| Returns: |
| dx (CTensor): dL / dx |
| """ |
| return singa.SoftMaxBackward(dy, self.axis, self.output) |
| |
| |
| def softmax(x, axis=1): |
| """ |
| Apply SoftMax for each row of the Tensor or each column of the Tensor |
| according to the parameter axis. |
| Args: |
| x (Tensor): the input 1d or 2d tensor |
| axis (int): axis of softmax, default to 1 |
| Returns: |
| the result Tensor |
| """ |
| return SoftMax(axis)(x)[0] |
| |
| |
| class Sum(Operator): |
| """ |
| Element-wise sum of each of the input tensors |
| """ |
| |
| def __init__(self): |
| super(Sum, self).__init__() |
| |
| def forward(self, *l): |
| """ |
| Args: |
| l (a list of CTensor): element-wise sum operator |
| Returns: |
| a CTensor for the result |
| """ |
| if training: |
| self.l = len(l) |
| assert (len(l) > 0) |
| x = singa.Tensor(list(l[0].shape()), l[0].device()) |
| x.SetFloatValue(0.0) |
| for i in range(len(l)): |
| x += l[i] |
| return x |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): dL / dy |
| Returns: |
| dx (CTensor): dL / dx |
| """ |
| return [dy] * self.l |
| |
| |
| def sum(*l): |
| """ |
| Element-wise sum of each of the input tensors |
| Args: |
| l (a list of Tensor): element-wise sum operator |
| Returns: |
| a Tensor for the result |
| """ |
| return Sum()(*l)[0] |
| |
| |
| class BinaryCrossEntropy(Operator): |
| |
| def __init__(self, t): |
| super(BinaryCrossEntropy, self).__init__() |
| self.t = t.data |
| |
| """ |
| Calculte negative log likelihood loss for a batch of training data. |
| """ |
| |
| def forward(self, x): |
| """ |
| Args: |
| x (CTensor): 1d or 2d tensor, the prediction data(output) |
| of current network. |
| t (CTensor): 1d or 2d tensor, the target data for training. |
| Returns: |
| loss (CTensor): scalar. |
| """ |
| posx = singa.AddFloat(x, 0.0001) |
| loss = singa.SumAll(singa.__mul__(self.t, singa.Log(posx))) |
| negt = singa.AddFloat(singa.MultFloat(self.t, -1.0), 1.0) |
| negx = singa.AddFloat(singa.MultFloat(x, -1.0), 1.0001) |
| negLoss = singa.SumAll(singa.__mul__(negt, singa.Log(negx))) |
| loss += negLoss |
| loss /= -x.shape()[0] |
| self.x = singa.AddFloat(x, 0.0001) |
| return loss |
| |
| def backward(self, dy=1.0): |
| """ |
| Args: |
| dy (float or CTensor): scalar, accumulate gradient from outside |
| of current network, usually equal to 1.0 |
| Returns: |
| dx (CTensor): data for the dL /dx, L is the loss, x is the output |
| of current network. note that this is true for |
| dy = 1.0 |
| """ |
| |
| dx = singa.__div__(self.t, self.x) |
| negt = singa.AddFloat(self.t, -1.0) |
| negx = singa.AddFloat(self.x, -0.9999) |
| dx -= singa.__div__(negt, negx) |
| dx *= float(-1.0 / self.x.shape()[0]) |
| if isinstance(dy, float): |
| # dtype of dy: float |
| dx *= dy |
| return dx |
| elif isinstance(dy, CTensor): |
| pass # TODO, broadcast elementwise multiply seems not support |
| |
| |
| def binary_cross_entropy(x, t): |
| return BinaryCrossEntropy(t)(x)[0] |
| |
| |
| class CrossEntropy(Operator): |
| |
| def __init__(self, t): |
| super(CrossEntropy, self).__init__() |
| self.t = t.data |
| |
| """ |
| Calculte negative log likelihood loss for a batch of training data. |
| """ |
| |
| def forward(self, x): |
| """ |
| Args: |
| x (CTensor): 1d or 2d tensor, the prediction data(output) |
| of current network. |
| t (CTensor): 1d or 2d tensor, the target data for training. |
| Returns: |
| loss (CTensor): scalar. |
| """ |
| loss = singa.SumAll(singa.__mul__(self.t, singa.Log(x))) |
| loss /= -x.shape()[0] |
| self.x = x |
| return loss |
| |
| def backward(self, dy=1.0): |
| """ |
| Args: |
| dy (float or CTensor): scalar, accumulate gradient from outside |
| of current network, usually equal to 1.0 |
| Returns: |
| dx (CTensor): data for the dL /dx, L is the loss, x is the output |
| of current network. note that this is true for |
| dy = 1.0 |
| """ |
| |
| dx = singa.__div__(self.t, self.x) |
| dx *= float(-1.0 / self.x.shape()[0]) |
| if isinstance(dy, float): |
| # dtype of dy: float |
| dx *= dy |
| return dx |
| elif isinstance(dy, CTensor): |
| pass # TODO, broadcast elementwise multiply seems not support |
| |
| |
| def cross_entropy(x, t): |
| assert x.ndim() == 2, "1st arg required 2d tensor. got shape: " + str( |
| x.shape) |
| assert t.ndim() <= 2, "2nd arg required <=2d tensor. got shape: " + str( |
| t.shape) |
| # x is the logits and t is the ground truth. |
| return CrossEntropy(t)(x)[0] |
| |
| |
| class RankingLoss(Operator): |
| |
| def __init__(self, M=0.2): |
| super().__init__() |
| # margin |
| self.M = M |
| |
| def forward(self, pos, neg): |
| # L = max{0, M - fn(pos) + fn(neg)} |
| zero = singa.Tensor(list(pos.shape()), pos.device()) |
| zero.SetFloatValue(0.0) |
| val = singa.AddFloat(singa.__sub__(neg, pos), self.M) |
| gt_zero = singa.__gt__(val, zero) |
| if training: |
| self.inputs = (gt_zero,) # (BS,) |
| all_loss = singa.__mul__(gt_zero, val) |
| loss = singa.SumAll(all_loss) |
| loss /= (pos.shape()[0]) |
| return loss |
| |
| def backward(self, dy=1.0): |
| assert training, "enable training mode to do backward" |
| # dpos = -1 if M-pos+neg > 0 else 0 |
| # dneg = 1 if M-pos+neg > 0 else 0 |
| gt_zero = self.inputs[0] |
| dpos_factor = singa.Tensor(list(gt_zero.shape()), gt_zero.device()) |
| dpos_factor.SetFloatValue(-1.0 / gt_zero.Size()) |
| dneg_factor = singa.Tensor(list(gt_zero.shape()), gt_zero.device()) |
| dneg_factor.SetFloatValue(1.0 / gt_zero.Size()) |
| dpos = singa.__mul__(gt_zero, dpos_factor) |
| dneg = singa.__mul__(gt_zero, dneg_factor) |
| return dpos, dneg |
| |
| |
| def ranking_loss(pos, neg, M=0.2): |
| assert pos.shape == neg.shape, "input and target shape different: %s, %s" % ( |
| pos.shape, neg.shape) |
| return RankingLoss(M)(pos, neg)[0] |
| |
| |
| class SoftMaxCrossEntropy(Operator): |
| |
| def __init__(self, t): |
| super(SoftMaxCrossEntropy, self).__init__() |
| self.t = t.data |
| |
| def forward(self, x): |
| self.p = singa.SoftMax(x) |
| ret = singa.CrossEntropyFwd(self.p, self.t) |
| loss = singa.SumAll(ret) |
| loss /= x.shape()[0] |
| return loss |
| |
| def backward(self, dy=1.0): |
| dx = singa.SoftmaxCrossEntropyBwd(self.p, self.t) |
| dx /= float(self.p.shape()[0]) |
| return dx |
| |
| |
| def softmax_cross_entropy(x, t): |
| assert x.ndim() == 2, "1st arg required 2d tensor. got shape: " + str( |
| x.shape) |
| assert t.ndim() <= 2, "2nd arg required <=2d tensor. got shape: " + str( |
| t.shape) |
| # x is the logits and t is the ground truth. |
| return SoftMaxCrossEntropy(t)(x)[0] |
| |
| |
| class MeanSquareError(Operator): |
| |
| def __init__(self, t): |
| super(MeanSquareError, self).__init__() |
| self.t = t.data |
| |
| def forward(self, x): |
| self.err = singa.__sub__(x, self.t) |
| sqr = singa.Square(self.err) |
| loss = singa.SumAll(sqr) |
| self.n = 1 |
| for s in x.shape(): |
| self.n *= s |
| loss /= self.n |
| return loss |
| |
| def backward(self, dy=1.0): |
| dx = self.err |
| dx *= float(2 / self.n) |
| dx *= dy |
| return dx |
| |
| |
| def mse_loss(x, t): |
| assert x.shape == t.shape, "input and target shape different: %s, %s" % ( |
| x.shape, t.shape) |
| return MeanSquareError(t)(x)[0] |
| |
| |
| def ctensor2numpy(x): |
| """ |
| To be used in SoftMax Operator. |
| Convert a singa_tensor to numpy_tensor. |
| """ |
| np_array = x.GetFloatValue(int(x.Size())) |
| return np_array.reshape(x.shape()) |
| |
| |
| class Flatten(Operator): |
| """ |
| Flattens the input tensor into a 2D matrix. If input tensor has shape |
| `(d_0, d_1, ... d_n)` then the output will have shape `(d_0 X d_1 ... |
| d_(axis-1), d_axis X d_(axis+1) ... X dn)`. |
| """ |
| |
| def __init__(self, axis=1): |
| """ |
| Args: |
| axis (int): Indicate up to which input dimensions (exclusive) |
| should be flattened to the outer dimension of the output. The |
| value for axis must be in the range [-r, r], where r is the |
| rank of the input tensor. Negative value means counting |
| dimensions from the back. When axis = 0, the shape of the |
| output tensor is `(1, (d_0 X d_1 ... d_n)`, where the shape |
| of the input tensor is `(d_0, d_1, ... d_n)`. |
| Returns: |
| the result CTensor |
| """ |
| super(Flatten, self).__init__() |
| self.axis = axis |
| |
| def forward(self, x): |
| """ |
| Args: |
| x (CTensor): the input tensor |
| Returns: |
| the result CTensor |
| """ |
| self.shape = list(x.shape()) |
| shape, axis = self.shape, self.axis |
| # the axis must be within this range (0, r-1) |
| assert axis <= len( |
| shape) - 1 or axis >= 0, "the axis must be within (0, %d-1)" % len( |
| shape) |
| # calculate the new shape |
| new_shape = (1, int(np.prod(shape))) if axis == 0 else ( |
| int(np.prod(shape[0:axis]).astype(int)), |
| int(np.prod(shape[axis:]).astype(int))) |
| y = singa.Reshape(x, new_shape) |
| return y |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): data for the dL / dy, L is the loss |
| Returns: |
| dx (CTensor): data for the dL / dx, L is the loss, |
| """ |
| dx = singa.Reshape(dy, self.shape) |
| return dx |
| |
| |
| def flatten(x, axis=1): |
| """ |
| Flattens the input tensor into a 2D matrix. If input tensor has shape |
| `(d_0, d_1, ... d_n)` then the output will have shape `(d_0 X d_1 ... |
| d_(axis-1), d_axis X d_(axis+1) ... X dn)`. |
| Args: |
| x (Tensor): the input tensor |
| axis (int): Indicate up to which input dimensions (exclusive) |
| should be flattened to the outer dimension of the output. The |
| value for axis must be in the range [-r, r], where r is the |
| rank of the input tensor. Negative value means counting |
| dimensions from the back. When axis = 0, the shape of the |
| output tensor is `(1, (d_0 X d_1 ... d_n)`, where the shape |
| of the input tensor is `(d_0, d_1, ... d_n)`. |
| Returns: |
| the result Tensor |
| """ |
| return Flatten(axis)(x)[0] |
| |
| |
| class ScatterElements(Operator): |
| """ |
| ScatterElements operator following ONNX Operator Schemas |
| https://github.com/onnx/onnx/blob/master/docs/Changelog.md#ScatterElements-11 |
| |
| Example usage: |
| data = [ |
| [0.0, 0.0, 0.0], |
| [0.0, 0.0, 0.0], |
| [0.0, 0.0, 0.0], |
| ] |
| axis = 0 |
| indices = [ |
| [1, 0, 2], |
| [0, 2, 1], |
| ] |
| updates = [ |
| [1.0, 1.1, 1.2], |
| [2.0, 2.1, 2.2], |
| ] |
| output = [ |
| [2.0, 1.1, 0.0] |
| [1.0, 0.0, 2.2] |
| [0.0, 2.1, 1.2] |
| ] |
| |
| """ |
| |
| def __init__(self, indices, updates, axis=0): |
| """ |
| Args: |
| indices (Tensor): index tensor |
| updates (Tensor): source tensor |
| axis (int): Which axis to scatter on. A negative value means |
| counting dimension from the back. Accepted range is [-r,r-1] |
| where r=rank(destination_tensor) |
| """ |
| super(ScatterElements, self).__init__() |
| self.indices = indices |
| self.updates = updates |
| self.axis = axis |
| |
| def forward(self, x): |
| x_shape = x.shape() |
| x_rank = len(x_shape) |
| if isinstance(self.indices, Tensor): |
| self.indices = tensor.to_numpy(self.indices) |
| elif isinstance(self.indices, (list, tuple)): |
| self.indices = np.array(self.indices) |
| if isinstance(self.updates, Tensor): |
| self.updates = tensor.to_numpy(self.updates) |
| elif isinstance(self.updates, (list, tuple)): |
| self.updates = np.array(self.updates) |
| self.updates.astype(np.int32) |
| _x = tensor.to_numpy(tensor.from_raw_tensor(x)) |
| _x = _x.astype(np.float32) |
| |
| assert x_rank == 2, "Only support 2D input." |
| assert x_rank == len( |
| self.indices.shape |
| ), "Index should have the same number of dimensions as output" |
| assert -x_rank < self.axis <= x_rank, "Axis is out of range" |
| assert np.logical_and( |
| -_x.shape[self.axis] < self.indices, |
| self.indices <= _x.shape[self.axis]).all( |
| ), "The values of the indexes should be between %d and %d" % ( |
| -_x.shape[self.axis], _x.shape[self.axis] - 1) |
| |
| self.axis = self.axis % x_rank |
| u_shape = self.updates.shape |
| y = _x.copy() |
| for i in range(u_shape[0]): |
| for j in range(u_shape[1]): |
| idx = int(self.indices[i][j]) |
| if self.axis == 0: |
| y[idx][j] = self.updates[i][j] |
| else: |
| y[i][idx] = self.updates[i][j] |
| y = tensor.from_numpy(y) |
| y.to_device(x.device()) |
| return y.data |
| |
| def backward(self, dy): |
| mask = np.ones(dy.shape(), dtype=np.float32) |
| u_shape = self.updates.shape |
| for i in range(u_shape[0]): |
| for j in range(u_shape[1]): |
| idx = int(self.indices[i][j]) |
| if self.axis == 0: |
| mask[idx][j] = 0. |
| else: |
| mask[i][idx] = 0. |
| mask = tensor.from_numpy(mask) |
| mask.to_device(dy.device()) |
| return singa.__mul__(dy, mask.data) |
| |
| |
| def scatter_elements(x, indices, updates, axis=0): |
| """ |
| Produces a ScatterElements operator |
| Args: |
| x (Tensor): input tensor. |
| indices (Tensor): index tensor |
| updates (Tensor): source tensor |
| axis (int): Which axis to scatter on. A negative value means |
| counting dimension from the back. Accepted range is [-r,r-1] |
| where r=rank(destination_tensor) |
| Returns: |
| the output Tensor. |
| """ |
| return ScatterElements(indices, updates, axis)(x)[0] |
| |
| |
| class Concat(Operator): |
| """ |
| Concatenate a list of tensors into a single tensor. All input tensors must |
| have the same shape, except for the dimension size of the axis to |
| concatenate on. |
| """ |
| |
| def __init__(self, axis=0): |
| """ |
| Args: |
| axis (int): Which axis to concat on. A negative value means |
| counting dimensions from the back. Accepted range is [-r, r-1] |
| where r = rank(inputs). |
| Returns: |
| the result CTensor |
| """ |
| super(Concat, self).__init__() |
| self.axis = axis |
| |
| def forward(self, *xs): |
| """ |
| Args: |
| xs (a list of CTensor): List of tensors for concatenation |
| Returns: |
| a CTensor for the result |
| """ |
| if self.axis < 0: |
| self.axis = self.axis % len(xs[0].shape()) |
| if training: |
| offset = 0 |
| self.slice_point = [] |
| for t in xs: |
| offset += t.shape()[self.axis] |
| self.slice_point.append(offset) |
| x = singa.VecTensor(list(xs)) |
| return singa.ConcatOn(x, self.axis) |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): data for the dL / dy, L is the loss |
| Returns: |
| dxs (a tuple of CTensor): data for the dL / dxs, L is the loss, |
| """ |
| assert hasattr( |
| self, "slice_point"), "Please set training as True before do BP. " |
| assert self.slice_point[-1] == dy.shape()[self.axis], "Shape mismatch." |
| dxs = [] |
| last_offset = 0 |
| for p in self.slice_point: |
| dxs.append(singa.SliceOn(dy, last_offset, p, self.axis)) |
| last_offset = p |
| return tuple(dxs) |
| |
| |
| def cat(xs, axis=0): |
| """ |
| Concatenate a list of tensors into a single tensor. All input tensors must |
| have the same shape, except for the dimension size of the axis to |
| concatenate on. |
| Args: |
| xs (a list of Tensor): List of tensors for concatenation |
| axis (int): Which axis to concat on. A negative value means |
| counting dimensions from the back. Accepted range is [-r, r-1] |
| where r = rank(inputs). |
| Returns: |
| a Tensor for the result |
| """ |
| return Concat(axis)(*xs)[0] |
| |
| |
| """ |
| def make_slice(arr, axis, i): # type: ignore |
| slc = [slice(None)] * arr.ndim |
| slc[axis] = i |
| return slc |
| """ |
| |
| |
| class _Conv2d(Operator): |
| """ |
| Init a conv 2d operator |
| """ |
| |
| def __init__(self, handle, odd_padding=(0, 0, 0, 0)): |
| """ |
| Args: |
| handle (object): ConvHandle for cpu or CudnnConvHandle for gpu |
| odd_padding (tuple of four ints):, the odd paddding is the value |
| that cannot be handled by the tuple padding (w, h) mode so |
| we need to firstly handle the input, then use the nomal padding |
| method. |
| """ |
| super(_Conv2d, self).__init__() |
| self.handle = handle |
| self.odd_padding = odd_padding |
| |
| def forward(self, x, W, b=None): |
| """ |
| Args: |
| x (CTensor): input |
| W (CTensor): weight |
| b (CTensor): bias |
| Returns: |
| CTensor |
| """ |
| assert x.nDim() == 4, "The dimensions of input should be 4D." |
| if self.odd_padding != (0, 0, 0, 0): |
| x = utils.handle_odd_pad_fwd(x, self.odd_padding) |
| |
| if training: |
| if self.handle.bias_term: |
| self.inputs = (x, W, b) |
| else: |
| self.inputs = (x, W) |
| |
| if not self.handle.bias_term: |
| # create empty bias tensor for Cpp API |
| b = CTensor((self.handle.num_filters,), x.device()) |
| b.SetFloatValue(0.0) |
| |
| if (type(self.handle) != singa.ConvHandle): |
| return singa.GpuConvForward(x, W, b, self.handle) |
| else: |
| return singa.CpuConvForward(x, W, b, self.handle) |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): dL / dy |
| Returns: |
| dx (CTensor): dL / dx |
| """ |
| assert training is True and hasattr( |
| self, "inputs"), "Please set training as True before do BP. " |
| |
| if (type(self.handle) != singa.ConvHandle): |
| dx = singa.GpuConvBackwardx(dy, self.inputs[1], self.inputs[0], |
| self.handle) |
| dW = singa.GpuConvBackwardW(dy, self.inputs[0], self.inputs[1], |
| self.handle) |
| db = singa.GpuConvBackwardb( |
| dy, self.inputs[2], |
| self.handle) if self.handle.bias_term else None |
| else: |
| dx = singa.CpuConvBackwardx(dy, self.inputs[1], self.inputs[0], |
| self.handle) |
| dW = singa.CpuConvBackwardW(dy, self.inputs[0], self.inputs[1], |
| self.handle) |
| db = singa.CpuConvBackwardb( |
| dy, self.inputs[2], |
| self.handle) if self.handle.bias_term else None |
| if self.odd_padding != (0, 0, 0, 0): |
| dx = utils.handle_odd_pad_bwd(dx, self.odd_padding) |
| |
| if db: |
| return dx, dW, db |
| |
| else: |
| return dx, dW |
| |
| |
| def conv2d(handle, x, W, b=None, odd_padding=(0, 0, 0, 0)): |
| """ |
| Conv 2d operator |
| Args: |
| handle (object): ConvHandle for cpu or CudnnConvHandle for gpu |
| x (Tensor): input |
| W (Tensor): weight |
| b (Tensor): bias |
| odd_padding (tuple of four ints):, the odd paddding is the value |
| that cannot be handled by the tuple padding (w, h) mode so |
| we need to firstly handle the input, then use the nomal padding |
| method. |
| """ |
| if b is None: |
| return _Conv2d(handle, odd_padding)(x, W)[0] |
| else: |
| return _Conv2d(handle, odd_padding)(x, W, b)[0] |
| |
| |
| class _BatchNorm2d(Operator): |
| """ |
| Carries out batch normalization as described in the paper |
| https://arxiv.org/abs/1502.03167. |
| """ |
| |
| def __init__(self, handle, running_mean, running_var, name=None): |
| """ |
| Args: |
| handle (object): BatchNormHandle for cpu and CudnnBatchNormHandle |
| for gpu |
| running_mean (float): the running_mean |
| running_var (float): the running_var |
| name (string): the name assigned to this operator |
| """ |
| super(_BatchNorm2d, self).__init__(name) |
| self.handle = handle |
| self.running_mean = running_mean.data |
| self.running_var = running_var.data |
| |
| def forward(self, x, scale, bias): |
| """ |
| Args: |
| x (CTensor): the input tensor |
| scale (CTensor): the bias tensor |
| bias (CTensor): the bias tensor |
| Returns: |
| the result CTensor |
| """ |
| if training: |
| if (type(self.handle) == singa.BatchNormHandle): |
| y, mean, var = singa.CpuBatchNormForwardTraining( |
| self.handle, x, scale, bias, self.running_mean, |
| self.running_var) |
| |
| self.cache = (x, scale, mean, var, y, bias) |
| else: |
| y, mean, var = singa.GpuBatchNormForwardTraining( |
| self.handle, x, scale, bias, self.running_mean, |
| self.running_var) |
| |
| self.cache = (x, scale, mean, var) |
| |
| else: |
| |
| if (type(self.handle) == singa.BatchNormHandle): |
| y = singa.CpuBatchNormForwardInference( |
| self.handle, |
| x, |
| scale, |
| bias, |
| self.running_mean, |
| self.running_var, |
| ) |
| else: |
| y = singa.GpuBatchNormForwardInference( |
| self.handle, |
| x, |
| scale, |
| bias, |
| self.running_mean, |
| self.running_var, |
| ) |
| return y |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): data for the dL / dy, L is the loss |
| Returns: |
| dx (CTensor): data for the dL / dx, L is the loss |
| ds (CTensor): data for the dL / ds, L is the loss |
| db (CTensor): data for the dL / db, L is the loss |
| """ |
| assert training is True and hasattr( |
| self, "cache"), "Please set training as True before do BP. " |
| |
| if (type(self.handle) == singa.BatchNormHandle): |
| x, scale, mean, var, y, bias = self.cache |
| dx, ds, db = singa.CpuBatchNormBackwardx(self.handle, y, dy, x, |
| scale, bias, mean, var) |
| else: |
| x, scale, mean, var = self.cache |
| dx, ds, db = singa.GpuBatchNormBackward(self.handle, dy, x, scale, |
| mean, var) |
| |
| return dx, ds, db |
| |
| |
| def batchnorm_2d(handle, x, scale, bias, running_mean, running_var): |
| """ |
| Carries out batch normalization as described in the paper |
| https://arxiv.org/abs/1502.03167. |
| Args: |
| handle (object): BatchNormHandle for cpu and CudnnBatchNormHandle |
| for gpu |
| x (Tensor): the input tensor |
| scale (Tensor): the bias tensor |
| bias (Tensor): the bias tensor |
| running_mean (float): the running_mean |
| running_var (float): the running_var |
| Returns: |
| the result Tensor |
| """ |
| return _BatchNorm2d(handle, running_mean, running_var)(x, scale, bias)[0] |
| |
| |
| class _Pooling2d(Operator): |
| """ |
| Init a pool 2d operator |
| """ |
| |
| def __init__(self, handle, odd_padding=(0, 0, 0, 0)): |
| """ |
| Args: |
| handle (object): PoolingHandle for cpu or CudnnPoolingHandle for |
| gpu |
| odd_padding (tuple of four int): the odd paddding is the value |
| that cannot be handled by the tuple padding (w, h) mode so |
| it needs to firstly handle the input, then use the normal |
| padding method. |
| """ |
| super(_Pooling2d, self).__init__() |
| self.handle = handle |
| self.odd_padding = odd_padding |
| |
| def forward(self, x): |
| """ |
| Args: |
| x (CTensor): the input tensor |
| Returns: |
| the result CTensor |
| """ |
| assert x.nDim() == 4, "The dimensions of input should be 4D." |
| if self.odd_padding != (0, 0, 0, 0): |
| x = utils.handle_odd_pad_fwd(x, self.odd_padding, True) |
| |
| if (type(self.handle) != singa.PoolingHandle): |
| y = singa.GpuPoolingForward(self.handle, x) |
| else: |
| y = singa.CpuPoolingForward(self.handle, x) |
| if training: |
| self.cache = (x, y) |
| return y |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): data for the dL / dy, L is the loss |
| Returns: |
| dx (CTensor): data for the dL / dx, L is the loss, |
| """ |
| if (type(self.handle) != singa.PoolingHandle): |
| dx = singa.GpuPoolingBackward(self.handle, dy, self.cache[0], |
| self.cache[1]) |
| else: |
| dx = singa.CpuPoolingBackward(self.handle, dy, self.cache[0], |
| self.cache[1]) |
| if self.odd_padding != (0, 0, 0, 0): |
| dx = utils.handle_odd_pad_bwd(dx, self.odd_padding) |
| |
| return dx |
| |
| |
| def pooling_2d(handle, x, odd_padding=(0, 0, 0, 0)): |
| """ |
| Pooling 2d operator |
| Args: |
| handle (object): PoolingHandle for cpu or CudnnPoolingHandle for |
| gpu |
| x (Tensor): input |
| odd_padding (tuple of four int): the odd paddding is the value |
| that cannot be handled by the tuple padding (w, h) mode so |
| it needs to firstly handle the input, then use the normal |
| padding method. |
| Returns: |
| the result Tensor |
| """ |
| return _Pooling2d(handle, odd_padding)(x)[0] |
| |
| |
| class Tanh(Operator): |
| """ |
| Calculates the hyperbolic tangent of the given input tensor element-wise. |
| """ |
| |
| def __init__(self): |
| super(Tanh, self).__init__() |
| |
| def forward(self, x): |
| """ |
| Args: |
| x (CTensor): Input tensor |
| Returns: |
| CTensor, the output |
| """ |
| out = singa.Tanh(x) |
| if training: |
| self.cache = (out,) |
| return out |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): the gradient tensor from upper operations |
| Returns: |
| CTensor, the gradient over input |
| """ |
| dx = singa.__mul__(self.cache[0], self.cache[0]) |
| dx = singa.MultFloat(dx, -1.0) |
| dx = singa.AddFloat(dx, 1.0) |
| dx *= dy |
| return dx |
| |
| |
| def tanh(x): |
| """ |
| Calculates the hyperbolic tangent of the given input tensor element-wise. |
| Args: |
| x (Tensor): Input tensor |
| Returns: |
| Tensor, the output |
| """ |
| return Tanh()(x)[0] |
| |
| |
| class Cos(Operator): |
| """ |
| Calculates the cosine of the given input tensor, element-wise. |
| """ |
| |
| def __init__(self): |
| super(Cos, self).__init__() |
| |
| def forward(self, x): |
| """ |
| Args: |
| x (CTensor): Input tensor |
| Returns: |
| CTensor, the output |
| """ |
| if training: |
| self.input = x |
| return singa.Cos(x) |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): the gradient tensor from upper operations |
| Returns: |
| CTensor, the gradient over input |
| """ |
| dx = singa.Sin(self.input) |
| dx = singa.MultFloat(dx, -1.0) |
| dx *= dy |
| return dx |
| |
| |
| def cos(x): |
| """ |
| Calculates the cosine of the given input tensor, element-wise. |
| Args: |
| x (Tensor): Input tensor |
| Returns: |
| Tensor, the output |
| """ |
| |
| return Cos()(x)[0] |
| |
| |
| class Cosh(Operator): |
| """ |
| Calculates the hyperbolic cosine of the given input tensor element-wise. |
| """ |
| |
| def __init__(self): |
| super(Cosh, self).__init__() |
| |
| def forward(self, x): |
| """ |
| Args: |
| x (CTensor): Input tensor |
| Returns: |
| CTensor, the output |
| """ |
| if training: |
| self.input = x |
| return singa.Cosh(x) |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): the gradient tensor from upper operations |
| Returns: |
| CTensor, the gradient over input |
| """ |
| dx = singa.Sinh(self.input) |
| dx *= dy |
| return dx |
| |
| |
| def cosh(x): |
| """ |
| Calculates the hyperbolic cosine of the given input tensor element-wise. |
| Args: |
| x (Tensor): Input tensor |
| Returns: |
| Tensor, the output |
| """ |
| return Cosh()(x)[0] |
| |
| |
| class Acos(Operator): |
| """ |
| Calculates the arccosine (inverse of cosine) of the given input tensor, |
| element-wise. |
| """ |
| |
| def __init__(self): |
| super(Acos, self).__init__() |
| |
| def forward(self, x): |
| """ |
| Args: |
| x (CTensor): Input tensor |
| Returns: |
| CTensor, the output |
| """ |
| if training: |
| self.input = x |
| return singa.Acos(x) |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): the gradient tensor from upper operations |
| Returns: |
| CTensor, the gradient over input |
| """ |
| dx = singa.Square(self.input) |
| dx = singa.MultFloat(dx, -1.0) |
| dx = singa.AddFloat(dx, 1.0) |
| dx = singa.PowFloat(dx, -0.5) |
| dx = singa.MultFloat(dx, -1.0) |
| dx *= dy |
| return dx |
| |
| |
| def acos(x): |
| """ |
| Calculates the arccosine (inverse of cosine) of the given input tensor, |
| element-wise. |
| Args: |
| x (Tensor): Input tensor |
| Returns: |
| Tensor, the output |
| """ |
| return Acos()(x)[0] |
| |
| |
| class Acosh(Operator): |
| """ |
| Calculates the hyperbolic arccosine of the given input tensor element-wise. |
| """ |
| |
| def __init__(self): |
| super(Acosh, self).__init__() |
| |
| def forward(self, x): |
| """ |
| Args: |
| x (CTensor): Input tensor |
| Returns: |
| CTensor, the output |
| """ |
| if training: |
| self.input = x |
| return singa.Acosh(x) |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): the gradient tensor from upper operations |
| Returns: |
| CTensor, the gradient over input |
| """ |
| dx = singa.SubFloat(self.input, 1.0) |
| dx = singa.Sqrt(dx) |
| temp = singa.AddFloat(self.input, 1.0) |
| temp = singa.Sqrt(temp) |
| dx = singa.__mul__(dx, temp) |
| dx = singa.PowFloat(dx, -1.0) |
| dx *= dy |
| return dx |
| |
| |
| def acosh(x): |
| """ |
| Calculates the hyperbolic arccosine of the given input tensor element-wise. |
| Args: |
| x (Tensor): Input tensor |
| Returns: |
| Tensor, the output |
| """ |
| return Acosh()(x)[0] |
| |
| |
| class Sin(Operator): |
| """ |
| Calculates the sine of the given input tensor, element-wise. |
| """ |
| |
| def __init__(self): |
| super(Sin, self).__init__() |
| |
| def forward(self, x): |
| """ |
| Args: |
| x (CTensor): Input tensor |
| Returns: |
| CTensor, the output |
| """ |
| if training: |
| self.input = x |
| return singa.Sin(x) |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): the gradient tensor from upper operations |
| Returns: |
| CTensor, the gradient over input |
| """ |
| dx = singa.Cos(self.input) |
| dx *= dy |
| return dx |
| |
| |
| def sin(x): |
| """ |
| Calculates the sine of the given input tensor, element-wise. |
| Args: |
| x (Tensor): Input tensor |
| Returns: |
| Tensor, the output |
| """ |
| return Sin()(x)[0] |
| |
| |
| class Sinh(Operator): |
| """ |
| Calculates the hyperbolic sine of the given input tensor element-wise. |
| """ |
| |
| def __init__(self): |
| super(Sinh, self).__init__() |
| |
| def forward(self, x): |
| """ |
| Args: |
| x (CTensor): Input tensor |
| Returns: |
| CTensor, the output |
| """ |
| if training: |
| self.input = x |
| return singa.Sinh(x) |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): the gradient tensor from upper operations |
| Returns: |
| CTensor, the gradient over input |
| """ |
| dx = singa.Cosh(self.input) |
| dx *= dy |
| return dx |
| |
| |
| def sinh(x): |
| """ |
| Calculates the hyperbolic sine of the given input tensor element-wise. |
| Args: |
| x (Tensor): Input tensor |
| Returns: |
| Tensor, the output |
| """ |
| return Sinh()(x)[0] |
| |
| |
| class Asin(Operator): |
| """ |
| Calculates the arcsine (inverse of sine) of the given input tensor, element-wise. |
| """ |
| |
| def __init__(self): |
| super(Asin, self).__init__() |
| |
| def forward(self, x): |
| """ |
| Args: |
| x (CTensor): Input tensor |
| Returns: |
| CTensor, the output |
| """ |
| if training: |
| self.input = x |
| return singa.Asin(x) |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): the gradient tensor from upper operations |
| Returns: |
| CTensor, the gradient over input |
| """ |
| dx = singa.Square(self.input) |
| dx = singa.MultFloat(dx, -1.0) |
| dx = singa.AddFloat(dx, 1.0) |
| dx = singa.PowFloat(dx, -0.5) |
| dx *= dy |
| return dx |
| |
| |
| def asin(x): |
| """ |
| Calculates the arcsine (inverse of sine) of the given input tensor, element-wise. |
| Args: |
| x (Tensor): Input tensor |
| Returns: |
| Tensor, the output |
| """ |
| |
| return Asin()(x)[0] |
| |
| |
| class Asinh(Operator): |
| """ |
| Calculates the hyperbolic arcsine of the given input tensor element-wise. |
| """ |
| |
| def __init__(self): |
| super(Asinh, self).__init__() |
| |
| def forward(self, x): |
| """ |
| Args: |
| x (CTensor): Input tensor |
| Returns: |
| CTensor, the output |
| """ |
| if training: |
| self.input = x |
| return singa.Asinh(x) |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): the gradient tensor from upper operations |
| Returns: |
| CTensor, the gradient over input |
| """ |
| dx = singa.Square(self.input) |
| dx = singa.AddFloat(dx, 1.0) |
| dx = singa.PowFloat(dx, -0.5) |
| dx *= dy |
| return dx |
| |
| |
| def asinh(x): |
| """ |
| Calculates the hyperbolic arcsine of the given input tensor element-wise. |
| Args: |
| x (Tensor): Input tensor |
| Returns: |
| Tensor, the output |
| """ |
| return Asinh()(x)[0] |
| |
| |
| class Tan(Operator): |
| """ |
| Insert single-dimensional entries to the shape of an input tensor (data). |
| """ |
| |
| def __init__(self): |
| super(Tan, self).__init__() |
| |
| def forward(self, x): |
| """ |
| Args: |
| x (CTensor): Input tensor |
| Returns: |
| CTensor, the output |
| """ |
| if training: |
| self.input = x |
| return singa.Tan(x) |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): the gradient tensor from upper operations |
| Returns: |
| CTensor, the gradient over input |
| """ |
| dx = singa.Cos(self.input) |
| dx = singa.Square(dx) |
| dx = singa.PowFloat(dx, -1.0) |
| dx *= dy |
| return dx |
| |
| |
| def tan(x): |
| """ |
| Calculates the tangent of the given input tensor, element-wise. |
| Args: |
| x (Tensor): Input tensor |
| Returns: |
| Tensor, the output |
| """ |
| return Tan()(x)[0] |
| |
| |
| class Atan(Operator): |
| """ |
| Calculates the arctangent (inverse of tangent) of the given input tensor, element-wise. |
| """ |
| |
| def __init__(self): |
| super(Atan, self).__init__() |
| |
| def forward(self, x): |
| """ |
| Args: |
| x (CTensor): Input tensor |
| Returns: |
| CTensor, the output |
| """ |
| if training: |
| self.input = x |
| return singa.Atan(x) |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): the gradient tensor from upper operations |
| Returns: |
| CTensor, the gradient over input |
| """ |
| dx = singa.Square(self.input) |
| dx = singa.AddFloat(dx, 1.0) |
| dx = singa.PowFloat(dx, -1.0) |
| dx *= dy |
| return dx |
| |
| |
| def atan(x): |
| """ |
| Calculates the arctangent (inverse of tangent) of the given input tensor, element-wise. |
| Args: |
| x (Tensor): Input tensor |
| Returns: |
| Tensor, the output |
| """ |
| return Atan()(x)[0] |
| |
| |
| class Atanh(Operator): |
| """ |
| Calculates the hyperbolic arctangent of the given input tensor element-wise. |
| """ |
| |
| def __init__(self): |
| super(Atanh, self).__init__() |
| |
| def forward(self, x): |
| """ |
| Args: |
| x (CTensor): Input tensor |
| Returns: |
| CTensor, the output |
| """ |
| if training: |
| self.input = x |
| return singa.Atanh(x) |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): the gradient tensor from upper operations |
| Returns: |
| CTensor, the gradient over input |
| """ |
| dx = singa.Square(self.input) |
| dx = singa.MultFloat(dx, -1.0) |
| dx = singa.AddFloat(dx, 1.0) |
| dx = singa.PowFloat(dx, -1.0) |
| dx *= dy |
| return dx |
| |
| |
| def atanh(x): |
| """ |
| Calculates the hyperbolic arctangent of the given input tensor element-wise. |
| Args: |
| x (Tensor): Input tensor |
| Returns: |
| Tensor, the output |
| """ |
| return Atanh()(x)[0] |
| |
| |
| class Sigmoid(Operator): |
| """ |
| `y = 1 / (1 + exp(-x))`, is applied to the tensor elementwise. |
| """ |
| |
| def __init__(self): |
| super(Sigmoid, self).__init__() |
| |
| def forward(self, x): |
| """ |
| Args: |
| x (CTensor): Input tensor |
| Returns: |
| CTensor, the output |
| """ |
| out = singa.Sigmoid(x) |
| if training: |
| self.cache = (out,) |
| return out |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): the gradient tensor from upper operations |
| Returns: |
| CTensor, the gradient over input |
| """ |
| dx = singa.MultFloat(self.cache[0], -1.0) |
| dx = singa.AddFloat(dx, 1.0) |
| dx = singa.__mul__(self.cache[0], dx) |
| dx *= dy |
| return dx |
| |
| |
| def sigmoid(x): |
| """ |
| `y = 1 / (1 + exp(-x))`, is applied to the tensor elementwise. |
| Args: |
| x (Tensor): Input tensor |
| Returns: |
| Tensor, the output |
| """ |
| return Sigmoid()(x)[0] |
| |
| |
| class Mul(Operator): |
| """ |
| Performs element-wise binary multiplication (with Numpy-style broadcasting |
| support). |
| """ |
| |
| def __init__(self): |
| super(Mul, self).__init__() |
| |
| def forward(self, a, b): |
| """ |
| Return `np.multiply(a,b)`, where a and b are CTensor. |
| """ |
| # todo we cannot support mul op for int tensors |
| _a, _b = a, b |
| dtype0 = _a.data_type() |
| dtype1 = _b.data_type() |
| if dtype0 == singa.kInt or dtype1 == singa.kInt: |
| _a = a.AsType(singa.kFloat32) |
| _b = b.AsType(singa.kFloat32) |
| res = singa.__mul__(_a, _b) |
| res = res.AsType(singa.kInt) |
| else: |
| res = singa.__mul__(_a, _b) |
| if training: |
| self.input = (_a, _b) |
| self.shape0 = list(_a.shape()) |
| self.shape1 = list(_b.shape()) |
| self.shape3 = list(res.shape()) |
| return res |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): the gradient tensor from upper operations |
| Returns: |
| a tuple for (da, db), da is data for dL / da, db is data |
| for dL / db. |
| """ |
| dx0 = singa.__mul__(dy, self.input[1]) |
| dx1 = singa.__mul__(dy, self.input[0]) |
| if (type(dy) == float) or self.shape0 == self.shape1: |
| assert self.shape0 == self.shape1, ('should have same shape') |
| return dx0, dx1 |
| # handle broadcast |
| dx0 = back_broadcast(self.shape3, self.shape0, dx0) |
| dx1 = back_broadcast(self.shape3, self.shape1, dx1) |
| return dx0, dx1 |
| |
| |
| def mul(x, y): |
| """ |
| Return `np.multiply(x,y)`, where a and b are Tensor. |
| """ |
| return Mul()(x, y)[0] |
| |
| |
| class Unsqueeze(Operator): |
| """ |
| Insert single-dimensional entries to the shape of an input tensor (data). |
| """ |
| |
| def __init__(self, axis): |
| """ |
| Args: |
| axis (list of int): the dimensions to be inserted. |
| """ |
| super(Unsqueeze, self).__init__() |
| if (type(axis) is int): |
| self.axis = list(axis) |
| else: |
| self.axis = axis |
| |
| def forward(self, x): |
| """ |
| Args: |
| x (CTensor): Input tensor |
| Returns: |
| CTensor, the output |
| """ |
| self.cache = x.shape() |
| cur = list(self.cache) |
| # todo, need optimize after we have scalar tensor |
| if len(self.cache) == 1 and self.axis == [0]: |
| return x |
| for i in self.axis: |
| cur.insert(i, 1) |
| return singa.Reshape(x, cur) |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): the gradient tensor from upper operations |
| Returns: |
| CTensor, the gradient over input |
| """ |
| return singa.Reshape(dy, self.cache) |
| |
| |
| def unsqueeze(x, axis=-1): |
| """ |
| Insert single-dimensional entries to the shape of an input tensor (data). |
| Args: |
| x (Tensor): Input tensor |
| axis (list of int): the dimensions to be inserted. |
| Returns: |
| Tensor, the output |
| """ |
| return Unsqueeze(axis)(x)[0] |
| |
| |
| class Transpose(Operator): |
| """ |
| Transpose the input tensor similar to numpy.transpose. |
| """ |
| |
| def __init__(self, perm): |
| """ |
| Args: |
| perm (list of ints): A list of integers. By default, reverse the |
| dimensions, otherwise permute the axes according to the values given. |
| """ |
| super(Transpose, self).__init__() |
| self.perm = list(perm) |
| |
| def forward(self, x): |
| """ |
| Args: |
| x (CTensor): Input tensor |
| Returns: |
| CTensor, the output |
| """ |
| return singa.Transpose(x, self.perm) |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): the gradient tensor from upper operations |
| Returns: |
| CTensor, the gradient over input |
| """ |
| cur = [] |
| for i in range(len(self.perm)): |
| cur += [self.perm.index(i)] |
| return singa.Transpose(dy, cur) |
| |
| |
| def transpose(x, shape): |
| """ |
| Transpose the input tensor similar to numpy.transpose. |
| Args: |
| x (Tensor): Input tensor |
| perm (list of ints): A list of integers. By default, reverse the |
| dimensions, otherwise permute the axes according to the values given. |
| Returns: |
| Tensor, the output |
| """ |
| return Transpose(shape)(x)[0] |
| |
| |
| def add_all(*xs): |
| assert len(xs) > 2 |
| y = add(xs[0], xs[1]) |
| for x in xs[2:]: |
| y = add(y, x) |
| return |
| |
| |
| class Abs(Operator): |
| """ |
| `y = abs(x)`, is applied to the tensor elementwise. |
| """ |
| |
| def forward(self, a): |
| """ |
| Return `abs(a)`, where a is CTensor. |
| """ |
| if training: |
| self.input = a |
| return singa.Abs(a) |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): the gradient tensor from upper operations |
| Returns: |
| CTensor, the gradient over input |
| """ |
| dx = singa.Sign(self.input) |
| dx *= dy |
| return dx |
| |
| |
| def abs(a): |
| """ |
| Return abs(a), where a is Tensor. |
| """ |
| return Abs()(a)[0] |
| |
| |
| class Exp(Operator): |
| """ |
| `y = exp(x)`, is applied to the tensor elementwise. |
| """ |
| |
| def forward(self, a): |
| """ |
| Return `exp(a)`, where a is Tensor. |
| """ |
| if training: |
| self.input = a |
| return singa.Exp(a) |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): the gradient tensor from upper operations |
| Returns: |
| CTensor, the gradient over input |
| """ |
| dx = singa.Exp(self.input) |
| dx *= dy |
| return dx |
| |
| |
| def exp(a): |
| """ |
| Return `exp(a)`, where a is Tensor. |
| """ |
| return Exp()(a)[0] |
| |
| |
| class LeakyRelu(Operator): |
| """ |
| `f(x) = alpha * x` for x < 0, `f(x) = x` for x >= 0, is applied to the tensor elementwise. |
| """ |
| |
| def __init__(self, a): |
| """ |
| Args: |
| a (float): Coefficient of leakage. |
| """ |
| super(LeakyRelu, self).__init__() |
| self.a = a |
| |
| def forward(self, x): |
| """ |
| Args: |
| x (CTensor): Input tensor |
| Returns: |
| CTensor, the output |
| """ |
| if training: |
| self.input = x |
| x1 = singa.LTFloat(x, 0.0) |
| x1 = singa.__mul__(x, x1) |
| x1 = singa.MultFloat(x1, self.a) |
| x2 = singa.ReLU(x) |
| x1 = singa.__add__(x1, x2) |
| return x1 |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): the gradient tensor from upper operations |
| Returns: |
| CTensor, the gradient over input |
| """ |
| # TODO(wangwei) check the correctness |
| dx1 = singa.GTFloat(self.input, 0.0) |
| dx2 = singa.LTFloat(self.input, 0.0) |
| dx2 = singa.MultFloat(dx2, self.a) |
| dx = singa.__add__(dx1, dx2) |
| dx *= dy |
| return dx |
| |
| |
| def leakyrelu(x, a=0.01): |
| """ |
| `f(x) = alpha * x` for x < 0, `f(x) = x` for x >= 0 is applied to the tensor |
| elementwise. |
| Args: |
| x (Tensor): Input tensor |
| a (float): Coefficient of leakage, default to 0.01. |
| Returns: |
| Tensor, the output |
| """ |
| return LeakyRelu(a)(x)[0] |
| |
| |
| class Sign(Operator): |
| """ |
| Calculate the sign of the given input tensor element-wise. If input > 0, |
| output 1. if input < 0, output -1. if input == 0, output 0. |
| """ |
| |
| def __init__(self): |
| super(Sign, self).__init__() |
| |
| def forward(self, a): |
| """ |
| Args: |
| a (CTensor): Input tensor |
| Returns: |
| CTensor, the output |
| """ |
| if training: |
| self.input = a |
| return singa.Sign(a) |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): the gradient tensor from upper operations |
| Returns: |
| CTensor, the gradient over input |
| """ |
| dx = singa.MultFloat(dy, 0.0) |
| return dx |
| |
| |
| def sign(a): |
| """ |
| Calculate the sign of the given input tensor element-wise. If input > 0, |
| output 1. if input < 0, output -1. if input == 0, output 0. |
| Args: |
| a (Tensor): Input tensor |
| Returns: |
| Tensor, the output |
| """ |
| return Sign()(a)[0] |
| |
| |
| class Pow(Operator): |
| """ |
| `f(x) = a^b`, is applied to the tensor elementwise. |
| """ |
| |
| def __init__(self): |
| super(Pow, self).__init__() |
| |
| def forward(self, a, b): |
| """ |
| Return `a^b`, where a and b are CTensor. |
| """ |
| res = singa.Pow(a, b) |
| if training: |
| self.input = (a, b) |
| self.shape0 = list(a.shape()) |
| self.shape1 = list(b.shape()) |
| self.shape3 = list(res.shape()) |
| return res |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): the gradient tensor from upper operations |
| Returns: |
| a tuple for (da, db), da is data for dL / da, db is data |
| for dL / db. |
| """ |
| da1 = singa.__mul__( |
| self.input[1], |
| singa.Pow(self.input[0], singa.SubFloat(self.input[1], 1.0))) |
| dx0 = singa.__mul__(da1, dy) |
| db1 = singa.__mul__(singa.Pow(self.input[0], self.input[1]), |
| singa.Log(self.input[0])) |
| dx1 = singa.__mul__(db1, dy) |
| if (type(dy) == float) or self.shape0 == self.shape1: |
| assert self.shape0 == self.shape1, ('should have same shape') |
| return dx0, dx1 |
| # handle broadcast |
| dx0 = back_broadcast(self.shape3, self.shape0, dx0) |
| dx1 = back_broadcast(self.shape3, self.shape1, dx1) |
| return dx0, dx1 |
| |
| |
| def pow(a, b): |
| """ |
| Return `a^b`, where a and b are Tensor. |
| """ |
| return Pow()(a, b)[0] |
| |
| |
| class SoftSign(Operator): |
| """ |
| Calculates the softsign `(x/(1+|x|))` of the given input tensor element-wise. |
| """ |
| |
| def __init__(self): |
| super(SoftSign, self).__init__() |
| |
| def forward(self, x): |
| """ |
| Return `(x/(1+|x|))`, where x is CTensor. |
| """ |
| # y = x / (1 + np.abs(x)) |
| if training: |
| self.input = x |
| x1 = singa.AddFloat(singa.Abs(x), 1.0) |
| y = singa.__div__(x, x1) |
| |
| return y |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): the gradient tensor from upper operations |
| Returns: |
| CTensor, the gradient over input |
| """ |
| dx = singa.AddFloat(singa.Abs(self.input), 1.0) |
| dx = singa.PowFloat(singa.Square(dx), -1.0) |
| dx = singa.__mul__(dy, dx) |
| return dx |
| |
| |
| def softsign(x): |
| """ |
| Return `(x/(1+|x|))`, where x is Tensor. |
| """ |
| return SoftSign()(x)[0] |
| |
| |
| class Sqrt(Operator): |
| """ |
| `y = x^0.5`, is applied to the tensor elementwise. |
| """ |
| |
| def __init__(self): |
| super(Sqrt, self).__init__() |
| |
| def forward(self, x): |
| """ |
| Return `x^0.5`, where x is CTensor. |
| """ |
| if training: |
| self.input = x |
| return singa.Sqrt(x) |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): the gradient tensor from upper operations |
| Returns: |
| CTensor, the gradient over input |
| """ |
| dx = singa.PowFloat(self.input, -0.5) |
| dx = singa.MultFloat(dx, 0.5) |
| dx = singa.__mul__(dy, dx) |
| return dx |
| |
| |
| def sqrt(x): |
| """ |
| Return `x^0.5`, where x is Tensor. |
| """ |
| return Sqrt()(x)[0] |
| |
| |
| class SoftPlus(Operator): |
| """ |
| `y = ln(exp(x) + 1)` is applied to the tensor elementwise. |
| """ |
| |
| def __init__(self): |
| super(SoftPlus, self).__init__() |
| |
| def forward(self, x): |
| """ |
| Return `ln(exp(x) + 1)`, where x is CTensor. |
| """ |
| #f(x) = ln(exp(x) + 1) |
| if training: |
| self.input = x |
| x1 = singa.AddFloat(singa.Exp(x), 1.0) |
| y = singa.Log(x1) |
| return y |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): the gradient tensor from upper operations |
| Returns: |
| CTensor, the gradient over input |
| """ |
| dx = singa.Exp(singa.MultFloat(self.input, -1.0)) |
| dx = singa.PowFloat(singa.AddFloat(dx, 1.0), -1.0) |
| dx = singa.__mul__(dy, dx) |
| return dx |
| |
| |
| def softplus(x): |
| """ |
| Return `ln(exp(x) + 1)`, where x is Tensor. |
| """ |
| return SoftPlus()(x)[0] |
| |
| |
| class Sub(Operator): |
| """ |
| Performs element-wise binary subtraction (with Numpy-style broadcasting |
| support). |
| """ |
| |
| def __init__(self): |
| super(Sub, self).__init__() |
| |
| def forward(self, a, b): |
| """ |
| Return `a-b`, where x is CTensor. |
| """ |
| ori_type = None |
| if a.data_type() != singa.kFloat32: |
| ori_type = a.data_type() |
| a = a.AsType(singa.kFloat32) |
| b = b.AsType(singa.kFloat32) |
| res = singa.__sub__(a, b) |
| if ori_type is not None: |
| res = res.AsType(ori_type) |
| if training: |
| self.shape0 = list(a.shape()) |
| self.shape1 = list(b.shape()) |
| self.shape3 = list(res.shape()) |
| return res |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): the gradient tensor from upper operations |
| Returns: |
| a tuple for (da, db), da is data for dL / da, db is data |
| for dL / db. |
| """ |
| dx0 = dy |
| dx1 = singa.MultFloat(dy, -1.0) |
| if (type(dy) == float) or self.shape0 == self.shape1: |
| assert self.shape0 == self.shape1, ('should have same shape') |
| return dx0, dx1 |
| # handle broadcast |
| dx0 = back_broadcast(self.shape3, self.shape0, dx0) |
| dx1 = back_broadcast(self.shape3, self.shape1, dx1) |
| return dx0, dx1 |
| |
| |
| def sub(a, b): |
| """ |
| Return a-b, where a and b are Tensor. |
| """ |
| return Sub()(a, b)[0] |
| |
| |
| # optimize min to support multi inputs |
| class Min(Operator): |
| """ |
| Element-wise min of each of the input tensors (with Numpy-style |
| broadcasting support). |
| """ |
| |
| def __init__(self): |
| super(Min, self).__init__() |
| self.masks = [] |
| |
| def _min(self, a, b): |
| """ |
| Args: |
| a (CTensor): First operand |
| b (CTensor): Second operand |
| Returns: |
| CTensor, the output |
| tuple of CTensor, mask tensor |
| """ |
| m = singa.__sub__(a, b) |
| mask0 = singa.LEFloat(m, 0) |
| mask1 = singa.GTFloat(m, 0) |
| res = singa.__add__(singa.__mul__(mask0, a), singa.__mul__(mask1, b)) |
| return res, (mask0, mask1) |
| |
| def forward(self, *x): |
| """ |
| Args: |
| *x (a list of CTensor): List of tensors for max. |
| Returns: |
| CTensor, the output |
| """ |
| assert (len(x) > 0) |
| self.l = len(x) |
| if len(x) == 1: |
| res, masks = self._min(x[0], x[0]) |
| self.masks.append(masks) |
| return x[0] |
| res, masks = self._min(x[0], x[1]) |
| self.masks.append(masks) |
| for i in range(2, len(x)): |
| res, masks = self._min(res, x[i]) |
| self.masks.append(masks) |
| return res |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): the gradient tensor from upper operations |
| Returns: |
| a tuple for (*dx), dx is data for dL / dx. |
| """ |
| if self.l == 1: |
| return self.masks[0][0] |
| else: |
| ret = [] |
| cumulation = None |
| for mask0, mask1 in self.masks[::-1]: |
| if not cumulation: |
| ret.insert(0, mask1) |
| cumulation = mask0 |
| else: |
| ret.insert(0, singa.__mul__(cumulation, mask1)) |
| cumulation = singa.__mul__(cumulation, mask0) |
| ret.insert(0, cumulation) |
| return tuple(ret) |
| |
| |
| def min(*l): |
| """ |
| Element-wise min of each of the input tensors (with Numpy-style |
| broadcasting support). |
| Args: |
| *x (a list of Tensor): List of tensors for max. |
| Returns: |
| Tensor, the output |
| """ |
| return Min()(*l)[0] |
| |
| |
| class Log(Operator): |
| """ |
| `y = log(x)`, is applied to the tensor elementwise. |
| """ |
| |
| def __init__(self): |
| super(Log, self).__init__() |
| |
| def forward(self, x): |
| """ |
| Return `log(x)`, where x is CTensor. |
| """ |
| if training: |
| self.input = x |
| return singa.Log(x) |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): the gradient tensor from upper operations |
| Returns: |
| CTensor, the gradient over input |
| """ |
| dx = singa.PowFloat(self.input, -1) |
| dx = singa.__mul__(dy, dx) |
| return dx |
| |
| |
| def log(x): |
| """ |
| Return log(x), where x is Tensor. |
| """ |
| return Log()(x)[0] |
| |
| |
| class HardSigmoid(Operator): |
| """ |
| `y = max(0, min(1, alpha * x + beta))`, is applied to the tensor elementwise. |
| """ |
| |
| def __init__(self, alpha=0.2, gamma=0.5): |
| """ |
| Args: |
| alpha (float): Value of alpha. |
| gamma (float): Value of beta. |
| """ |
| super(HardSigmoid, self).__init__() |
| self.alpha = alpha |
| self.gamma = gamma |
| |
| def forward(self, x): |
| """ |
| Args: |
| x (CTensor): matrix |
| Returns: |
| a CTensor for the result |
| """ |
| x = singa.AddFloat(singa.MultFloat(x, self.alpha), self.gamma) |
| if training: |
| self.cache = x |
| |
| x = singa.ReLU(x) |
| mask1 = singa.LTFloat(x, 1.0) |
| mask2 = singa.GEFloat(x, 1.0) |
| |
| ans = singa.__add__(singa.__mul__(x, mask1), mask2) |
| return singa.ReLU(ans) |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): the gradient tensor from upper operations |
| Returns: |
| CTensor, the gradient over input |
| """ |
| mask0 = singa.GTFloat(self.cache, 0.0) |
| mask1 = singa.LTFloat(self.cache, 1.0) |
| mask = singa.__mul__(mask0, mask1) |
| return singa.__mul__(singa.MultFloat(mask, self.alpha), dy) |
| |
| |
| def hardsigmoid(x, alpha=0.2, gamma=0.5): |
| """ |
| `y = max(0, min(1, alpha * x + beta))`, is applied to the tensor elementwise. |
| Args: |
| x (Tensor): matrix |
| alpha (float): Value of alpha. |
| gamma (float): Value of beta. |
| Returns: |
| a Tensor for the result |
| """ |
| return HardSigmoid(alpha, gamma)(x)[0] |
| |
| |
| class Squeeze(Operator): |
| """ |
| Remove single-dimensional entries from the shape of a tensor. Takes a |
| parameter axes with a list of axes to squeeze. If axes is not provided, |
| all the single dimensions will be removed from the shape. If an axis is |
| selected with shape entry not equal to one, an error is raised. |
| """ |
| |
| def __init__(self, axis=[]): |
| """ |
| Args: |
| axis (list of ints): List of integers indicating the dimensions |
| to squeeze. Negative value means counting dimensions from |
| the back. Accepted range is [-r, r-1] where r = rank(data). |
| """ |
| super(Squeeze, self).__init__() |
| self.axis = axis |
| |
| def forward(self, x): |
| """ |
| Args: |
| x (CTensor): Input tensor |
| Returns: |
| CTensor, the output |
| """ |
| self.cache = x.shape() |
| newshape = [] |
| if (self.axis == []): |
| newshape = list(filter(lambda i: i != 1, self.cache)) |
| else: |
| for id, i in enumerate(self.axis): |
| assert i < len(self.cache) |
| self.axis[id] = i % len(self.cache) |
| assert self.cache[ |
| i] == 1, "the length of axis {} is {}, which should be 1".format( |
| i, self.cache[i]) |
| for ind, v in enumerate(self.cache): |
| if ind not in self.axis: |
| newshape.append(v) |
| # todo, need optimize after we have scalar tensor |
| if newshape == []: |
| return x |
| return singa.Reshape(x, newshape) |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): the gradient tensor from upper operations |
| Returns: |
| CTensor, the gradient over input |
| """ |
| return singa.Reshape(dy, self.cache) |
| |
| |
| def squeeze(x, axis=[]): |
| """ |
| Remove single-dimensional entries from the shape of a tensor. Takes a |
| parameter axes with a list of axes to squeeze. If axes is not provided, |
| all the single dimensions will be removed from the shape. If an axis is |
| selected with shape entry not equal to one, an error is raised. |
| Args: |
| x (Tensor): Input tensor |
| axis (list of ints): List of integers indicating the dimensions |
| to squeeze. Negative value means counting dimensions from |
| the back. Accepted range is [-r, r-1] where r = rank(data). |
| Returns: |
| Tensor, the output |
| """ |
| return Squeeze(axis)(x)[0] |
| |
| |
| class Div(Operator): |
| """ |
| Performs element-wise binary division (with Numpy-style broadcasting support). |
| """ |
| |
| def __init__(self): |
| super(Div, self).__init__() |
| |
| def forward(self, a, b): |
| """ |
| Return `np.div(a,b)`, where a and b are CTensor. |
| """ |
| ori_type = None |
| if a.data_type() != singa.kFloat32: |
| ori_type = a.data_type() |
| a = a.AsType(singa.kFloat32) |
| b = b.AsType(singa.kFloat32) |
| res = singa.__mul__(a, singa.PowFloat(b, -1.0)) |
| # res = singa.__div__(a, b) |
| if ori_type is not None: |
| res = res.AsType(ori_type) |
| if training: |
| self.input = (singa.MultFloat(a, -1.0), singa.PowFloat(b, -1.0) |
| ) # -a, 1/b |
| self.shape0 = list(a.shape()) |
| self.shape1 = list(b.shape()) |
| self.shape3 = list(res.shape()) |
| return res |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): the gradient tensor from upper operations |
| Returns: |
| a CTensor tuple for (da, db), da is data for dL / da, db is data |
| for dL / db. |
| """ |
| #dy/dx_0 = b^(-1) |
| #dy/dx_1 = (-a)*b^(-2) |
| dx0 = singa.__mul__(dy, self.input[1]) |
| dx1 = singa.__mul__(self.input[0], singa.PowFloat(self.input[1], 2.0)) |
| dx1 = singa.__mul__(dy, dx1) |
| if (type(dy) == float) or self.shape0 == self.shape1: |
| assert self.shape0 == self.shape1, ('should have same shape') |
| return dx0, dx1 |
| # handle broadcast |
| dx0 = back_broadcast(self.shape3, self.shape0, dx0) |
| dx1 = back_broadcast(self.shape3, self.shape1, dx1) |
| return dx0, dx1 |
| |
| |
| def div(a, b): |
| """ |
| Return `np.div(a,b)`, where a and b are Tensor. |
| """ |
| return Div()(a, b)[0] |
| |
| |
| class Shape(Operator): |
| """ |
| Takes a tensor as input and outputs a tensor containing the shape of the |
| input tensor. |
| """ |
| |
| def __init__(self): |
| super(Shape, self).__init__() |
| |
| def forward(self, x): |
| """ |
| Args: |
| x (CTensor): Input tensor |
| Returns: |
| CTensor, the output |
| """ |
| cur = list(x.shape()) |
| cur = tensor.from_numpy(np.array(cur)) |
| cur.to_device(x.device()) |
| return cur.data |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): the gradient tensor from upper operations |
| Returns: |
| list of int, the shape of dy |
| """ |
| return list(dy.shape()) |
| |
| |
| def shape(x): |
| """ |
| Takes a tensor as input and outputs a tensor containing the shape of the |
| input tensor. |
| Args: |
| x (Tensor): Input tensor |
| Returns: |
| Tensor, the output |
| """ |
| return Shape()(x)[0] |
| |
| |
| # optimize max to support multi inputs |
| class Max(Operator): |
| """ |
| Element-wise max of each of the input tensors (with Numpy-style |
| broadcasting support). |
| """ |
| |
| def __init__(self): |
| super(Max, self).__init__() |
| self.masks = [] |
| |
| def _max(self, a, b): |
| """ |
| Args: |
| a (CTensor): First operand |
| b (CTensor): Second operand |
| Returns: |
| CTensor, the output |
| tuple of CTensor, mask tensor |
| """ |
| m = singa.__sub__(a, b) |
| mask0 = singa.GEFloat(m, 0) |
| mask1 = singa.LTFloat(m, 0) |
| res = singa.__add__(singa.__mul__(mask0, a), singa.__mul__(mask1, b)) |
| return res, (mask0, mask1) |
| |
| def forward(self, *x): |
| """ |
| Args: |
| *x (a list of CTensor): List of tensors for max. |
| Returns: |
| CTensor, the output |
| """ |
| assert (len(x) > 0) |
| self.l = len(x) |
| if len(x) == 1: |
| res, masks = self._max(x[0], x[0]) |
| self.masks.append(masks) |
| return x[0] |
| res, masks = self._max(x[0], x[1]) |
| self.masks.append(masks) |
| for i in range(2, len(x)): |
| res, masks = self._max(res, x[i]) |
| self.masks.append(masks) |
| return res |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): the gradient tensor from upper operations |
| Returns: |
| a tuple for (*dx), dx is data for dL / dx. |
| """ |
| if self.l == 1: |
| return self.masks[0][0] |
| else: |
| ret = [] |
| cumulation = None |
| for mask0, mask1 in self.masks[::-1]: |
| if not cumulation: |
| ret.insert(0, mask1) |
| cumulation = mask0 |
| else: |
| ret.insert(0, singa.__mul__(cumulation, mask1)) |
| cumulation = singa.__mul__(cumulation, mask0) |
| ret.insert(0, cumulation) |
| return tuple(ret) |
| |
| |
| def max(*l): |
| """ |
| Element-wise max of each of the input tensors (with Numpy-style broadcasting support). |
| Args: |
| *x (a list of Tensor): List of tensors for max. |
| Returns: |
| Tensor, the output |
| """ |
| return Max()(*l)[0] |
| |
| |
| class And(Operator): |
| """ |
| Returns the tensor resulted from performing the and logical operation elementwise on the input tensors A and B (with Numpy-style broadcasting support). |
| """ |
| |
| def __init__(self): |
| super(And, self).__init__() |
| |
| def forward(self, a, b): |
| """ |
| Return `np.logical_and(a,b)`, where a and b are CTensor. |
| """ |
| m = singa.__mul__(a, b) |
| cur = singa.PowFloat(singa.Sign(m), 2) |
| |
| return cur |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): the gradient tensor from upper operations |
| Raises: |
| AssertionError: no backward function for this operator |
| """ |
| assert False, ('no gradient for backward function') |
| |
| |
| def _and(a, b): |
| """ |
| Return `np.logical_and(a,b)`, where a and b are Tensor. |
| """ |
| return And()(a, b)[0] |
| |
| |
| class Or(Operator): |
| """ |
| Returns the tensor resulted from performing the or logical operation elementwise on the input tensors A and B (with Numpy-style broadcasting support). |
| """ |
| |
| def __init__(self): |
| super(Or, self).__init__() |
| |
| def forward(self, a, b): |
| """ |
| Return `np.logical_or(a,b)`, where a and b are CTensor. |
| """ |
| m = singa.__add__(singa.PowFloat(singa.Sign(a), 2.0), |
| singa.PowFloat(singa.Sign(b), 2.0)) |
| cur = singa.Sign(m) |
| |
| return cur |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): data for the `dL / dy`, L is the loss. |
| Raises: |
| AssertionError: no backward function for this operator |
| """ |
| assert False, ('no gradient for backward function') |
| |
| |
| def _or(a, b): |
| """ |
| Return np.logical_or(a,b), where a and b are Tensor. |
| """ |
| return Or()(a, b)[0] |
| |
| |
| class Not(Operator): |
| """ |
| Returns the negation of the input tensor element-wise. |
| """ |
| |
| def __init__(self): |
| super(Not, self).__init__() |
| |
| def forward(self, x): |
| """ |
| Return `np.logical_not(x)`, where x is CTensor. |
| """ |
| mask0 = singa.GEFloat(x, 0) |
| mask1 = singa.LEFloat(x, 0) |
| cur = singa.__mul__(mask0, mask1) |
| |
| return cur |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): the gradient tensor from upper operations |
| Raises: |
| AssertionError: no backward function for this operator |
| """ |
| assert False, ('no gradient for backward function') |
| |
| |
| def _not(x): |
| """ |
| Return `np.logical_not(x)`, where x is Tensor. |
| """ |
| return Not()(x)[0] |
| |
| |
| class Xor(Operator): |
| """ |
| Performing the xor logical operation elementwise on the input tensors A and B (with Numpy-style broadcasting support). |
| """ |
| |
| def __init__(self): |
| super(Xor, self).__init__() |
| |
| def forward(self, a, b): |
| """ |
| Return `np.logical_xor(a,b)`, where a and b are CTensor. |
| """ |
| m = singa.__sub__(singa.PowFloat(singa.Sign(a), 2.0), |
| singa.PowFloat(singa.Sign(b), 2.0)) |
| cur = singa.PowFloat(singa.Sign(m), 2.0) |
| |
| return cur |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): the gradient tensor from upper operations |
| Raises: |
| AssertionError: no backward function for this operator |
| """ |
| assert False, ('no gradient for backward function') |
| |
| |
| def _xor(a, b): |
| """ |
| Return `np.logical_xor(a,b)`, where a and b are Tensor. |
| """ |
| return Xor()(a, b)[0] |
| |
| |
| class Negative(Operator): |
| """ |
| `y = -x`, is applied to the tensor elementwise. |
| """ |
| |
| def __init__(self): |
| super(Negative, self).__init__() |
| |
| def forward(self, x): |
| """ |
| Return `-x`, where x is CTensor. |
| """ |
| #y=-x |
| return singa.MultFloat(x, -1) |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): the gradient tensor from upper operations |
| Returns: |
| CTensor, the gradient over input |
| """ |
| return singa.MultFloat(dy, -1) |
| |
| |
| def negative(x): |
| """ |
| Return `-x`, where x is Tensor. |
| """ |
| return Negative()(x)[0] |
| |
| |
| class Reciprocal(Operator): |
| """ |
| `y = 1/x`, is applied to the tensor elementwise. |
| """ |
| |
| def __init__(self): |
| super(Reciprocal, self).__init__() |
| |
| def forward(self, x): |
| """ |
| Return `1/x`, where x is CTensor. |
| """ |
| #y=1/x elementwise |
| if training: |
| self.input = x |
| |
| return singa.PowFloat(x, -1) |
| |
| def backward(self, dy): |
| """ |
| Args: |
| dy (CTensor): the gradient tensor from upper operations |
| Returns: |
| CTensor, the gradient over input |
| """ |
| #dy/dx = -1/x**2 |
| dx = singa.MultFloat(singa.PowFloat(self.input, -2), -1) |
| return singa.__mul__(dy, dx) |
| |
| |
| def reciprocal(x): |
| """ |
| Return 1/x, where x is Tensor. |
| """ |
| return Reciprocal()(x)[0] |
| |
| |
| class Gemm(Operator): |
| """ |
| Init a General Matrix multiplication(Gemm) operator. Compute `Y = alpha * |
| A' * B' + beta * C`, where input tensor A has shape (M, K) or (K, M), input |
| tensor B has shape (K, N) or (N, K), input tensor C is broadcastable to |
| shape (M, N), and output tensor Y has shape (M, N). |
| `A' = transpose(A)` if transA else A |
| `B' = transpose(B)` if transB else B |
| """ |
| |
| def __init__(self, alpha=1.0, beta=1.0, transA=0, transB=0): |
| """ |
| Args: |
| alpha (float): Scalar multiplier for the product of input tensors |
| A * B. |
| beta (float): Scalar multiplier for input tensor C. |
| ransA (int): Whether A should be transposed |
| transB (int): Whether B should be transposed |
| Returns: |
| CTensor, the output |
| """ |
| super(Gemm, self).__init__() |
| self.alpha = alpha |
| self.beta = beta |
| self.transA = transA |
| self.transB = transB |
| |
| def forward(self, A, B, C=None): |
| """ |
| forward propogation of Gemm |
| Args: |
| A (CTensor): The shape of A should be (M, K) if transA is 0, or |
| (K, M) if transA is non-zero. |
| B (CTensor): The shape of B should be (K, N) if transB is 0, or |
| (N, K) if transB is non-zero. |
| C (CTensor): (optional), Optional input tensor C. If not specified, |
| the computation is done as if C is a scalar 0. The shape of C |
| should be unidirectional broadcastable to (M, N). |
| Returns: |
| tensor, the output |
| """ |
| _A = singa.DefaultTranspose(A) if self.transA == 1 else A |
| _B = singa.DefaultTranspose(B) if self.transB == 1 else B |
| if training: |
| self.inputs = (_A, _B, C) |
| tmpM = singa.MultFloat(singa.Mult(_A, _B), self.alpha) |
| if C: |
| tmpM = singa.__add__(tmpM, singa.MultFloat(C, self.beta)) |
| return tmpM |
| |
| def backward(self, dy): |
| """ |
| backward propogation of Gemm |
| Args: |
| dy (CTensor): The shape of A should be (M, K) if transA is 0, or (K, M) if transA is non-zero. |
| Returns: |
| CTensor, the gradient over A |
| CTensor, the gradient over B |
| CTensor(optional), the gradient over C |
| """ |
| _A, _B, C = self.inputs |
| # y = alpha * A * B => da = alpha * dy * BT |
| # y = alpha * A * BT => da = alpha * dy * B |
| # y = alpha * AT * B => da = alpha * B * dyT = alpha * (dy * BT)T |
| # y = alpha * AT * BT => da = alpha * BT * dyT = alpha * (dy * B)T |
| da = singa.MultFloat(singa.Mult(dy, singa.DefaultTranspose(_B)), |
| self.alpha) |
| if self.transA: |
| da = singa.DefaultTranspose(da) |
| |
| # y = alpha * A * B => db = alpha * AT * dy |
| # y = alpha * AT * B => db = alpha * A * dy |
| # y = alpha * A * BT => db = alpha * dyT * A = alpha * (AT * dy)T |
| # y = alpha * AT * BT => db = alpha * dyT * AT = alpha * (A * dy)T |
| db = singa.MultFloat(singa.Mult(singa.DefaultTranspose(_A), dy), |
| self.alpha) |
| if self.transB: |
| db = singa.DefaultTranspose(db) |
| if C: |
| dc = back_broadcast(dy.shape(), C.shape(), |
| singa.MultFloat(dy, self.beta)) |
| return da, db, dc |
| else: |
| return da, db |
| |
| |
| def gemm(A, B, C=None, alpha=1.0, beta=1.0, transA=0, transB=0): |
| """ |
| Init a General Matrix multiplication(Gemm) operator. Compute `Y = alpha * |
| A' * B' + beta * C`, where input tensor A has shape (M, K) or (K, M), input |
| tensor B has shape (K, N) or (N, K), input tensor C is broadcastable to |
| shape (M, N), and output tensor Y has shape (M, N). |
| `A' = transpose(A)` if transA else A |
| `B' = transpose(B)` if transB else B |
| Args: |
| A (Tensor): The shape of A should be (M, K) if transA is 0, or |
| (K, M) if transA is non-zero. |
| B (Tensor): The shape of B should be (K, N) if transB is 0, or |
| (N, K) if transB is non-zero. |
| C (Tensor): (optional), Optional input tensor C. If not specified, |
| the computation is done as if C is a scalar 0. The shape of C |
| should be unidirectional broadcastable to (M, N). |
| alpha (float): Scalar multiplier for the product of input tensors A * B. |
| beta (float): Scalar multiplier for input tensor C. |
| ransA (int): Whether A should be transposed |
| transB (int): Whether B should be transposed |
| Returns: |
| Tensor, the output |
| """ |
| if C: |
| return Gemm(alpha, beta, transA, transB)(A, B, C)[0] |
| else: |
| return Gemm(alpha, beta, transA, transB)(A, B)[0] |
| |
| |
| class GlobalAveragePool(Operator): |
| """ |
| Init a GlobalAveragePool operator |
| """ |
| |
| def __init__(self, data_format='channels_first'): |
| """ |
| Args: |
| data_format (string): A string, we support two formats: |
| channels_last and channels_first, default is channels_first. |
| channels_first means the format of input is (N x C x H x W) |
| channels_last means the format of input is (N x H x W x C) |
| """ |
| super(GlobalAveragePool, self).__init__() |
| self.data_format = data_format |
| |
| def forward(self, x): |
| """ |
| forward propogation of GlobalAveragePool |
| Args: |
| x (CTensor): the input tensor |
| Returns: |
| CTensor, the output |
| """ |
| if training: |
| self.mask = singa.Tensor(x.shape(), x.device()) |
| |
| shape = list(x.shape()) |
| |
| # (N x C x H x W) for channels_first |
| if self.data_format == 'channels_first': |
| axes = tuple(i for i in range(2, len(shape))) |
| self.shape_divisor = 1 / np.prod(shape[2:]) |
| else: # (N x H x W x C) for channels_last |
| axes = tuple(i for i in range(1, len(shape) - 1)) |
| self.shape_divisor = 1 / np.prod(shape[1:-1]) |
| |
| # output shape |
| # (N x C x 1 x 1) for channels_first |
| # (N x 1 x 1 x C) for channels_last |
| for i in axes: |
| shape[i] = 1 |
| |
| x = tensor.from_raw_tensor(x) |
| x = tensor.sum(x, axis=axes) |
| x = tensor.reshape(x, shape) |
| return singa.MultFloat(x.data, self.shape_divisor) |
| |
| def backward(self, dy): |
| """ |
| backward propogation of GlobalAveragePool |
| Args: |
| dy (CTensor): the gradient tensor from upper operations |
| Returns: |
| CTensor, the gradient over input |
| """ |
| self.mask.SetFloatValue(self.shape_divisor) |
| return singa.__mul__(self.mask, dy) |
| |
| |
| def globalaveragepool(x, data_format='channels_first'): |
| """ |
| GlobalAveragePool operator |
| Args: |
| x (Tensor): the input tensor |
| data_format (string): A string, we support two formats: |
| channels_last and channels_first, default is channels_first. |
| channels_first means the format of input is (N x C x H x W) |
| channels_last means the format of input is (N x H x W x C) |
| Returns: |
| Tensor, the output |
| """ |
| return GlobalAveragePool(data_format)(x)[0] |
| |
| |
| class ConstantOfShape(Operator): |
| """ |
| Init a ConstantOfShape, generate a tensor with given value and shape. |
| """ |
| |
| def __init__(self, value=0.): |
| """ |
| Args: |
| value (float): (Optional) The value of the output elements. Should |
| be a one-element value. If not specified, it defaults to 0 and |
| datatype float32 |
| """ |
| super(ConstantOfShape, self).__init__() |
| self.value = value |
| |
| def forward(self, x): |
| """ |
| forward of ConstantOfShape |
| Args: |
| x: CTensor, 1D tensor. The shape of the expected output tensor. |
| All values must be >= 0. |
| Returns: |
| the output CTensor. If attribute 'value' is specified, the value |
| and datatype of the output tensor is taken from 'value'. If |
| attribute 'value' is not specified, the value in the output |
| defaults to 0, and the datatype defaults to float32. |
| """ |
| x_shape = tensor.to_numpy(tensor.from_raw_tensor(x)).astype( |
| np.int64).tolist() |
| assert np.min(x_shape) >= 0, ('shape cannot be negative') |
| x = CTensor(x_shape, x.device()) |
| x.SetFloatValue(self.value) |
| return x |
| |
| def backward(self, dy): |
| """ |
| backward of ConstantOfShape |
| Args: |
| dy (CTensor): gradient tensor. |
| Raises: |
| AssertionError: no backward function for this operator |
| """ |
| assert False, ('no gradient for backward function') |
| |
| |
| def constant_of_shape(x, value=0): |
| """ |
| Init a ConstantOfShape, generate a tensor with given value and shape. |
| Args: |
| x: Tensor, 1D tensor. The shape of the expected output tensor. |
| All values must be >= 0. |
| value (float): (Optional) The value of the output elements. Should |
| be a one-element value. If not specified, it defaults to 0 and |
| datatype float32 |
| Returns: |
| the output Tensor. If attribute 'value' is specified, the value |
| and datatype of the output tensor is taken from 'value'. If |
| attribute 'value' is not specified, the value in the output |
| defaults to 0, and the datatype defaults to float32. |
| """ |
| return ConstantOfShape(value)(x)[0] |
| |
| |
| class Dropout(Operator): |
| """ |
| Init a Dropout, which scales the masked input data by the following equation: |
| `output = scale * data * mask`, `scale = 1. / (1. - ratio)`. |
| """ |
| |
| def __init__(self, seed=0, ratio=0.5): |
| """ |
| Args: |
| seed (int): the random seed |
| ratio (float): the ratio of random dropout, with value in [0, 1). |
| """ |
| super(Dropout, self).__init__() |
| self.ratio = ratio |
| self.seed = int(seed) |
| self.init_seed = False |
| |
| def forward(self, x): |
| """ |
| forward of Dropout |
| Args: |
| x (CTensor): input tensor. |
| Returns: |
| the output CTensor. |
| """ |
| if not self.init_seed: |
| x.device().SetRandSeed(self.seed) |
| self.init_seed = True |
| if training: |
| self.scale = 1 / 1 - self.ratio |
| self.mask = singa.Tensor(list(x.shape()), x.device()) |
| singa.Bernoulli(1 - self.ratio, self.mask) |
| x = singa.MultFloat(singa.__mul__(self.mask, x), self.scale) |
| return x |
| |
| def backward(self, dy): |
| """ |
| backward of Dropout |
| Args: |
| dy (CTensor): gradient tensor. |
| Returns: |
| the gradient tensor over input tensor. |
| """ |
| if training: |
| dy = singa.MultFloat(singa.__mul__(self.mask, dy), self.scale) |
| return dy |
| |
| |
| def dropout(x, seed=0, ratio=0.5): |
| """ |
| Init a Dropout, which scales the masked input data by the following |
| equation: `output = scale * data * mask`, `scale = 1. / (1. - ratio)`. |
| Args: |
| x (Tensor): input tensor. |
| ratio (float): the ratio of random dropout, with value in [0, 1). |
| Returns: |
| the output Tensor. |
| """ |
| return Dropout(seed, ratio)(x)[0] |
| |
| |
| class ReduceSum(Operator): |
| """ |
| Init a ReduceSum, computes the sum of the input tensor's element along |
| the provided axes. |
| """ |
| |
| def __init__(self, axes=None, keepdims=1): |
| """ |
| Args: |
| axes (list of int): A list of integers, along which to reduce. |
| Accepted range is [-r, r-1] where r = rank(data). The default |
| is None, which reduces over all the dimensions of the input tensor. |
| keepdims (int): Keep the reduced dimension or not, default 1 mean |
| keep reduced dimension. |
| """ |
| super(ReduceSum, self).__init__() |
| self.axes = axes |
| self.keepdims = keepdims |
| |
| def forward(self, x): |
| """ |
| forward of ReduceSum |
| Args: |
| x (CTensor): input tensor. |
| Returns: |
| the output CTensor. |
| """ |
| _x = tensor.from_raw_tensor(x) |
| x_shape = list(_x.shape) |
| # handle the special axes |
| if self.axes is None: |
| self.axes = [i for i in range(len(x_shape))] # axes = None |
| else: |
| self.axes = [i if i >= 0 else len(x_shape) + i for i in self.axes |
| ] # axes has negative |
| self.axes.sort(reverse=True) |
| for axis in self.axes: |
| _x = tensor.sum(_x, axis) |
| x_shape[axis] = 1 |
| if self.keepdims == 1: |
| _x = tensor.reshape(_x, x_shape) |
| self.cache = (x_shape, x) |
| return _x.data |
| |
| def backward(self, dy): |
| """ |
| backward of ReduceSum |
| Args: |
| dy (CTensor): gradient tensor. |
| Returns: |
| the gradient tensor over input tensor. |
| """ |
| x_shape, x = self.cache |
| dy = singa.Reshape(dy, x_shape) |
| scale = np.prod(x_shape) / np.prod(x.shape()) |
| mask = singa.Tensor(list(x.shape()), x.device()) |
| mask.SetFloatValue(scale) |
| dy = singa.__mul__(mask, dy) |
| return dy |
| |
| |
| def reduce_sum(x, axes=None, keepdims=1): |
| """ |
| Init a ReduceSum, computes the sum of the input tensor's element along |
| the provided axes. |
| Args: |
| x (Tensor): input tensor. |
| axes (list of int): A list of integers, along which to reduce. |
| Accepted range is [-r, r-1] where r = rank(data). The default |
| is None, which reduces over all the dimensions of the input tensor. |
| keepdims (int): Keep the reduced dimension or not, default 1 mean |
| keep reduced dimension. |
| Returns: |
| the output Tensor. |
| """ |
| return ReduceSum(axes, keepdims)(x)[0] |
| |
| |
| class ReduceMean(Operator): |
| """ |
| Init a ReduceMean, computes the mean of the input tensor's element along |
| the provided axes. |
| """ |
| |
| def __init__(self, axes=None, keepdims=1): |
| """ |
| Args: |
| axes (list of int): A list of integers, along which to reduce. |
| Accepted range is [-r, r-1] where r = rank(data). The default |
| is None, which reduces over all the dimensions of the input tensor. |
| keepdims (int): Keep the reduced dimension or not, default 1 mean |
| keep reduced dimension. |
| """ |
| super(ReduceMean, self).__init__() |
| self.axes = axes |
| self.keepdims = keepdims |
| |
| def forward(self, x): |
| """ |
| forward of ReduceMean |
| Args: |
| x (CTensor): input tensor. |
| Returns: |
| the output CTensor. |
| """ |
| _x = tensor.from_raw_tensor(x) |
| x_shape = list(_x.shape) |
| # handle the special axes |
| if self.axes is None: |
| self.axes = [i for i in range(len(x_shape))] # axes = None |
| else: |
| self.axes = [i if i >= 0 else len(x_shape) + i for i in self.axes |
| ] # axes has negative |
| self.axes.sort(reverse=True) |
| for axis in self.axes: |
| _x = tensor.sum(_x, axis) |
| x_shape[axis] = 1 |
| if self.keepdims == 1: |
| _x = tensor.reshape(_x, x_shape) |
| self.cache = (x_shape, x) |
| scale = np.prod(x_shape) / np.prod(x.shape()) |
| self.scale = scale |
| _x = singa.MultFloat(_x.data, scale) |
| return _x |
| |
| def backward(self, dy): |
| """ |
| backward of ReduceMean |
| Args: |
| dy (CTensor): gradient tensor. |
| Returns: |
| the gradient tensor over input tensor. |
| """ |
| x_shape, x = self.cache |
| dy = singa.Reshape(dy, x_shape) |
| mask = singa.Tensor(list(x.shape()), x.device()) |
| mask.SetFloatValue(1.0) |
| dy = singa.__mul__(mask, dy) |
| dy = singa.MultFloat(dy, self.scale) |
| return dy |
| |
| |
| def reduce_mean(x, axes=None, keepdims=1): |
| """ |
| Init a ReduceMean, computes the mean of the input tensor's element along |
| the provided axes. |
| Args: |
| x (Tensor): input tensor. |
| axes (list of int): A list of integers, along which to reduce. |
| Accepted range is [-r, r-1] where r = rank(data). The default |
| is None, which reduces over all the dimensions of the input tensor. |
| keepdims (int): Keep the reduced dimension or not, default 1 mean |
| keep reduced dimension. |
| Returns: |
| the output Tensor. |
| """ |
| return ReduceMean(axes, keepdims)(x)[0] |
| |
| |
| class Slice(Operator): |
| """ |
| Init a Slice, Produces a slice of the input tensor along multiple axes. |
| Similar to numpy: https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html |
| """ |
| |
| def __init__(self, starts, ends, axes=None, steps=None): |
| """ |
| Args: |
| starts (list of int): starting indices of corresponding axis |
| ends (list of int): ending indices of corresponding axis |
| axes (list of int): axes that `starts` and `ends` apply to. |
| Negative value means counting dimensions from the back. |
| Accepted range is [-r, r-1] where r = rank(data). |
| steps (list of int): slice step of corresponding axis in `axes`. |
| Negative value means slicing backward. 'steps' cannot be 0. |
| Defaults to 1. |
| """ |
| super(Slice, self).__init__() |
| self.starts = starts |
| self.ends = ends |
| self.axes = axes |
| self.steps = steps |
| |
| def forward(self, x): |
| """ |
| forward of Slice |
| Args: |
| x (CTensor): input tensor. |
| Returns: |
| the output CTensor. |
| """ |
| x_shape = list(x.shape()) |
| # handle the special axes |
| if self.axes is None: |
| self.axes = [i for i in range(len(x_shape))] # axes = None |
| else: |
| self.axes = [i if i >= 0 else len(x_shape) + i for i in self.axes |
| ] # axes has negative |
| self.cache = [] |
| # handle the special steps |
| if self.steps is None: |
| self.steps = [1] * len(x_shape) # steps = None |
| for idx, axis in enumerate(self.axes): |
| axis = int(axis) |
| start, end, step = self.starts[idx], self.ends[idx], self.steps[idx] |
| if end > x_shape[axis]: |
| end = x_shape[axis] |
| self.cache.append((axis, x_shape[axis], start, end, step)) |
| xs = [] |
| for step_idx in range(x_shape[axis])[start:end:step]: |
| xs.append(singa.SliceOn(x, step_idx, step_idx + 1, axis)) |
| assert len(xs) > 0, "Cannot support empty tensor" |
| x = singa.VecTensor(xs) |
| x = singa.ConcatOn(x, axis) |
| return x |
| |
| def backward(self, dy): |
| """ |
| backward of Slice |
| Args: |
| dy (CTensor): gradient tensor. |
| Returns: |
| the gradient tensor over input tensor. |
| """ |
| for axis, shape, start, end, step in self.cache[::-1]: |
| data_idxes = tuple(range(shape)[start:end:step]) |
| dys = [] |
| data_idx = 0 |
| for step_idx in range(shape): |
| if step_idx in data_idxes: |
| tmp_tensor = singa.SliceOn(dy, data_idx, data_idx + 1, axis) |
| data_idx += 1 |
| else: |
| tmp_shape = list(dy.shape()) |
| tmp_shape[axis] = 1 |
| tmp_tensor = singa.Tensor(tmp_shape, dy.device()) |
| tmp_tensor.SetFloatValue(0.) |
| dys.append(tmp_tensor) |
| dys = singa.VecTensor(dys) |
| dy = singa.ConcatOn(dys, axis) |
| return dy |
| |
| |
| def slice(x, starts, ends, axes=None, steps=None): |
| """ |
| Init a Slice, Produces a slice of the input tensor along multiple axes. |
| Similar to numpy: https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html |
| Args: |
| x (Tensor): input tensor. |
| starts (list of int): starting indices of corresponding axis |
| ends (list of int): ending indices of corresponding axis |
| axes (list of int): axes that `starts` and `ends` apply to. |
| Negative value means counting dimensions from the back. |
| Accepted range is [-r, r-1] where r = rank(data). |
| steps (list of int): slice step of corresponding axis in `axes`. |
| Negative value means slicing backward. 'steps' cannot be 0. |
| Defaults to 1. |
| Returns: |
| the output Tensor. |
| """ |
| return Slice(starts, ends, axes, steps)(x)[0] |
| |
| |
| class Ceil(Operator): |
| """ |
| Ceil takes one input data (Tensor) and produces one output data (Tensor) |
| where the ceil is, `y = ceil(x)`, is applied to the tensor elementwise. |
| """ |
| |
| def __init__(self): |
| super(Ceil, self).__init__() |
| |
| def forward(self, x): |
| """ |
| forward of Ceil |
| Args: |
| x (CTensor): input tensor. |
| Returns: |
| the output CTensor. |
| """ |
| return singa.Ceil(x) |
| |
| def backward(self, dy): |
| """ |
| backward of Ceil |
| Args: |
| dy (CTensor): gradient tensor. |
| Returns: |
| the gradient tensor over input tensor. |
| """ |
| dy = singa.Tensor(dy.shape(), dy.device()) |
| dy.SetFloatValue(0.) |
| return dy |
| |
| |
| def ceil(x): |
| """ |
| Ceil takes one input data (Tensor) and produces one output data (Tensor) |
| where the ceil is, `y = ceil(x)`, is applied to the tensor elementwise. |
| Args: |
| x (Tensor): input tensor. |
| Returns: |
| the output Tensor. |
| """ |
| return Ceil()(x)[0] |
| |
| |
| class Floor(Operator): |
| """ |
| Floor takes one input data (Tensor) and produces one output data (Tensor), |
| where the floor is, `y = floor(x)`, is applied to the tensor elementwise |
| """ |
| |
| def __init__(self): |
| super(Floor, self).__init__() |
| |
| def forward(self, x): |
| """ |
| forward of floor |
| Args: |
| x (CTensor): input tensor |
| Returns: |
| the output CTensor |
| """ |
| return singa.Floor(x) |
| |
| def backward(self, dy): |
| """ |
| backward of floor. Derivative of floor is 0 |
| Args: |
| dy (CTensor): gradient tensor |
| Returns: |
| the gradient tensor over the input tensor. |
| """ |
| dy = singa.Tensor(dy.shape(), dy.device()) |
| dy.SetFloatValue(0.) |
| return dy |
| |
| |
| def floor(x): |
| """ |
| floor takes one input data (Tensor) and produces one output data (Tensor) |
| the value of floor is `y = floor(x)`, is applied to the tensor elementwise. |
| Args: |
| x(Tensor): input tensor. |
| Returns: |
| the output tensor |
| """ |
| return Floor()(x)[0] |
| |
| |
| class Split(Operator): |
| """ |
| Init a Split, Split a tensor into a list of tensors, along the specified |
| 'axis'. |
| """ |
| |
| def __init__(self, axis, parts, num_output=None): |
| """ |
| Args: |
| axis (int): which axis to split on. A negative value means |
| counting dimensions from the back. Accepted range is |
| [-rank, rank-1] where r = rank(input). |
| parts (list of int): length of each output, which can be specified |
| using argument 'parts'. Otherwise, the tensor is parts to equal |
| sized parts. |
| num_output (bool): once parts is none, the tensor is split to equal |
| sized parts for each output. |
| """ |
| super(Split, self).__init__() |
| self.axis = axis |
| self.parts = parts |
| self.num_output = num_output |
| if self.parts is None: |
| assert self.num_output is not None, "For (parts, num_output), it at least requires one." |
| |
| def forward(self, x): |
| """ |
| forward of Split |
| Args: |
| x (CTensor): input tensor. |
| Returns: |
| the output CTensor. |
| """ |
| x_shape = list(x.shape()) |
| self.axis = self.axis % len(x_shape) |
| if self.parts is None: |
| self.parts = [x_shape[self.axis] // self.num_output |
| ] * self.num_output |
| xs = [] |
| _s = 0 |
| for _l in self.parts: |
| xs.append(singa.SliceOn(x, _s, _s + _l, self.axis)) |
| _s += _l |
| return tuple(xs) |
| |
| def backward(self, *dys): |
| """ |
| backward of Split |
| Args: |
| dys: list of CTensor, gradient tensor. |
| Returns: |
| the gradient tensor over input tensor. |
| """ |
| dys = singa.VecTensor(dys) |
| dy = singa.ConcatOn(dys, self.axis) |
| return dy |
| |
| |
| def split(x, axis, parts, num_output=None): |
| """ |
| Init a Split, Split a tensor into a list of tensors, along the specified |
| 'axis'. |
| Args: |
| x (Tensor): input tensor. |
| axis (int): which axis to split on. A negative value means |
| counting dimensions from the back. Accepted range is |
| [-rank, rank-1] where r = rank(input). |
| parts (list of int): length of each output, which can be specified |
| using argument 'parts'. Otherwise, the tensor is parts to equal |
| sized parts. |
| num_output (bool): once parts is none, the tensor is split to equal |
| sized parts for each output. |
| Returns: |
| the output Tensor. |
| """ |
| return Split(axis, parts, num_output)(x) |
| |
| |
| class Gather(Operator): |
| """ |
| Init a Gather, Given data tensor of rank r >= 1, and indices tensor of |
| rank q, gather entries of the axis dimension of data (by default outer-most |
| one as axis=0) indexed by indices, and concatenates them in an output tensor of rank `q + (r - 1)`. |
| """ |
| |
| def __init__(self, axis, indices): |
| """ |
| Args: |
| axis (int): which axis to slice on. A negative value means counting |
| dimensions from the back. Accepted range is [-rank, rank-1] |
| where r = rank(input). |
| indices (list of int): entries of the axis dimension of data. |
| """ |
| super(Gather, self).__init__() |
| self.axis = axis |
| self.indices = indices |
| |
| def forward(self, x): |
| """ |
| forward of Gather |
| Args: |
| x (CTensor): input tensor. |
| Returns: |
| the output CTensor. |
| """ |
| self.x_shape = list(x.shape()) |
| self.axis = self.axis % len(self.x_shape) # handle the negative value |
| _shape = self.x_shape[self.axis] |
| xs = [] |
| for indice in self.indices: |
| # each indice is a sub-indice |
| if isinstance(indice, (tuple, list, np.ndarray)): |
| sub_xs = [] |
| for idx in indice: |
| idx = int(idx % _shape) |
| tmp_tensor = singa.SliceOn(x, idx, idx + 1, self.axis) |
| sub_xs.append(tmp_tensor) |
| sub_xs = singa.VecTensor(sub_xs) |
| tmp_tensor = singa.ConcatOn(sub_xs, self.axis) |
| _slice_shape = list(tmp_tensor.shape()) |
| _slice_shape.insert(self.axis, 1) # add a new axis to concat |
| tmp_tensor = singa.Reshape(tmp_tensor, _slice_shape) |
| else: |
| indice = int(indice % _shape) |
| tmp_tensor = singa.SliceOn(x, indice, indice + 1, self.axis) |
| xs.append(tmp_tensor) |
| xs = singa.VecTensor(xs) |
| return singa.ConcatOn(xs, self.axis) |
| |
| def backward(self, dy): |
| """ |
| backward of Gather |
| Args: |
| dy (CTensor): gradient tensor. |
| Returns: |
| the gradient tensor over input tensor. |
| """ |
| _shape = self.x_shape[self.axis] |
| |
| def construct_dx(dy, axis, indices, _shape): |
| dys = [] |
| data_idx = 0 |
| data_idxes = tuple(indices) |
| for step_idx in range(_shape): |
| if step_idx in data_idxes: |
| tmp_tensor = singa.SliceOn(dy, data_idx, data_idx + 1, axis) |
| data_idx += 1 |
| else: |
| tmp_shape = list(dy.shape()) |
| tmp_shape[axis] = 1 |
| tmp_tensor = singa.Tensor(tmp_shape, dy.device()) |
| tmp_tensor.SetFloatValue(0.) |
| dys.append(tmp_tensor) |
| dys = singa.VecTensor(dys) |
| dy = singa.ConcatOn(dys, axis) |
| return dy |
| |
| if isinstance(self.indices[0], tuple) or isinstance( |
| self.indices[0], list): |
| dx = singa.Tensor(self.x_shape, dy.device()) |
| dx.SetFloatValue(0.) |
| for data_idx in range(len(self.indices)): |
| # get a piece of the dy and remove its new axis added at forward |
| tmp_tensor = singa.SliceOn(dy, data_idx, data_idx + 1, |
| self.axis) |
| _slice_shape = list(tmp_tensor.shape()) |
| del _slice_shape[self.axis] |
| tmp_tensor = singa.Reshape(tmp_tensor, _slice_shape) |
| # construct dx and sum them together |
| tmp_tensor = construct_dx(tmp_tensor, self.axis, |
| self.indices[data_idx], |
| self.x_shape[self.axis]) |
| dx = singa.__add__(dx, tmp_tensor) |
| return dx |
| else: |
| return construct_dx(dy, self.axis, self.indices, _shape) |
| |
| |
| def gather(x, axis, indices): |
| """ |
| Init a Gather, Given data tensor of rank r >= 1, and indices tensor of |
| rank q, gather entries of the axis dimension of data (by default outer-most |
| one as axis=0) indexed by indices, and concatenates them in an output tensor of rank `q + (r - 1)`. |
| Args: |
| x (Tensor): input tensor. |
| axis (int): which axis to slice on. A negative value means counting |
| dimensions from the back. Accepted range is [-rank, rank-1] |
| where r = rank(input). |
| indices (list of int): entries of the axis dimension of data. |
| Returns: |
| the output Tensor. |
| """ |
| return Gather(axis, indices)(x)[0] |
| |
| |
| class Tile(Operator): |
| """ |
| Init a Tile, Constructs a tensor by tiling a given tensor. This is the same |
| as function tile in Numpy: https://docs.scipy.org/doc/numpy/reference/generated/numpy.tile.html |
| """ |
| |
| def __init__(self, repeats): |
| """ |
| Args: |
| repeats (list of int): 1D int matrix of the same length as input's |
| dimension number, includes numbers of repeated copies along |
| input's dimensions. |
| """ |
| super(Tile, self).__init__() |
| self.repeats = [repeats] if isinstance(repeats, int) else repeats |
| |
| def forward(self, x): |
| """ |
| forward of Tile |
| Args: |
| x (CTensor): input tensor. |
| Returns: |
| the output CTensor. |
| """ |
| self.x_shape = list(x.shape()) |
| # add new axis from head |
| if len(self.x_shape) < len(self.repeats): |
| append_len = len(self.repeats) - len(self.x_shape) |
| new_shape = [1] * append_len + self.x_shape |
| x = singa.Reshape(x, new_shape) |
| for axis, rp in enumerate(self.repeats): |
| if rp == 1: |
| continue |
| xs = [] |
| for idx in range(rp): |
| xs.append(x.Clone()) |
| xs = singa.VecTensor(xs) |
| x = singa.ConcatOn(xs, axis) |
| return x |
| |
| def backward(self, dy): |
| """ |
| backward of Tile |
| Args: |
| dy (CTensor): gradient tensor. |
| Returns: |
| the gradient tensor over input tensor. |
| """ |
| for axis, rp in enumerate(self.repeats): |
| if rp == 1: |
| continue |
| _slice_shape = list(dy.shape()) |
| ori_len = _slice_shape[axis] // rp |
| _slice_shape[axis] = ori_len |
| _dy = singa.Tensor(_slice_shape, dy.device()) |
| _dy.SetFloatValue(0.) |
| |
| for idx in range(rp): |
| tmp_tensor = singa.SliceOn(dy, ori_len * idx, |
| ori_len * (idx + 1), axis) |
| _dy = singa.__add__(_dy, tmp_tensor) |
| dy = _dy |
| # remove the new axis we added at forward |
| if len(self.x_shape) < len(self.repeats): |
| dy = singa.Reshape(dy, self.x_shape) |
| return dy |
| |
| |
| def tile(x, repeats): |
| """ |
| Init a Tile, Constructs a tensor by tiling a given tensor. This is the same |
| as function tile in Numpy: https://docs.scipy.org/doc/numpy/reference/generated/numpy.tile.html |
| Args: |
| x (Tensor): input tensor. |
| repeats (list of int): 1D int matrix of the same length as input's |
| dimension number, includes numbers of repeated copies along |
| input's dimensions. |
| Returns: |
| the output Tensor. |
| """ |
| return Tile(repeats)(x)[0] |
| |
| |
| class NonZero(Operator): |
| """ |
| Init a NonZero, Constructs a tensor by tiling a given tensor. This is the same |
| as function tile in Numpy: https://docs.scipy.org/doc/numpy/reference/generated/numpy.tile.html |
| """ |
| |
| def __init__(self): |
| super(NonZero, self).__init__() |
| |
| def forward(self, x): |
| """ |
| forward of NonZero |
| Args: |
| x (CTensor): input tensor. |
| Returns: |
| the output CTensor. |
| """ |
| y = tensor.to_numpy(tensor.from_raw_tensor(x)) |
| y = np.array((np.nonzero(y))).astype(np.int32) |
| y = tensor.from_numpy(y) |
| y.to_device(x.device()) |
| return y.data |
| |
| def backward(self, dy): |
| """ |
| backward of NonZero |
| Args: |
| dy (CTensor): gradient tensor. |
| Raises: |
| AssertionError: no backward function for this operator |
| """ |
| assert False, ('no gradient for backward function') |
| |
| |
| def nonzero(x): |
| """ |
| Init a NonZero, Constructs a tensor by tiling a given tensor. This is the same |
| as function tile in Numpy: https://docs.scipy.org/doc/numpy/reference/generated/numpy.tile.html |
| Args: |
| x (Tensor): input tensor. |
| Returns: |
| the output Tensor. |
| """ |
| return NonZero()(x)[0] |
| |
| |
| class Cast(Operator): |
| """ |
| The operator casts the elements of a given input tensor to a data type |
| specified by the 'to' argument and returns an output tensor of the same |
| size in the converted type. |
| """ |
| |
| def __init__(self, to): |
| """ |
| Args: |
| to (int): data type, float32 = 0; int = 2. |
| """ |
| super(Cast, self).__init__() |
| self.to = to |
| |
| def forward(self, x): |
| """ |
| forward of Cast |
| Args: |
| x (CTensor): input tensor. |
| Returns: |
| the output CTensor. |
| """ |
| if x.data_type() != self.to: |
| x = x.AsType(self.to) |
| return x |
| |
| def backward(self, dy): |
| """ |
| backward of Cast |
| Args: |
| dy (CTensor), gradient tensor. |
| Raises: |
| AssertionError: no backward function for this operator |
| """ |
| assert False, ('no gradient for backward function') |
| |
| |
| def cast(x, to): |
| """ |
| The operator casts the elements of a given input tensor to a data type |
| specified by the 'to' argument and returns an output tensor of the same |
| size in the converted type. |
| Args: |
| x (Tensor): input tensor. |
| to (int): data type, float32 = 0; int = 2. |
| Returns: |
| the output Tensor. |
| """ |
| return Cast(to)(x)[0] |
| |
| |
| class OneHot(Operator): |
| """ |
| Produces a one-hot tensor based on inputs. |
| """ |
| |
| def __init__(self, axis, depth, values): |
| """ |
| Args: |
| axis (int): Axis along which one-hot representation in added. |
| Default: axis=-1. axis=-1 means that the additional dimension |
| will be inserted as the innermost/last dimension in the output |
| tensor. |
| depth (int): Scalar specifying the number of classes in one-hot |
| tensor. This is also the size of the one-hot dimension |
| (specified by 'axis' attribute) added on in the output tensor. |
| The values in the 'indices' input tensor are expected to be in |
| the range [-depth, depth-1]. |
| values (float): Rank 1 tensor containing exactly two elements, in |
| the format [off_value, on_value], where 'on_value' is the |
| value used for filling locations specified in 'indices' input |
| tensor, |
| """ |
| super(OneHot, self).__init__() |
| self.axis = axis |
| self.depth = depth |
| self.values = values |
| |
| def forward(self, indices): |
| """ |
| forward of OneHot, we borrow this function from onnx |
| Args: |
| indices (CTensor): Scalar specifying the number of classes in |
| one-hot tensor. The values in the 'indices' input tensor are |
| expected to be in the range [-depth, depth-1]. |
| Returns: |
| the output CTensor. |
| """ |
| values = tensor.to_numpy(tensor.from_raw_tensor(indices)) |
| rank = len(values.shape) |
| depth_range = np.arange(self.depth) |
| if self.axis < 0: |
| self.axis += (rank + 1) |
| ls = values.shape[0:self.axis] |
| rs = values.shape[self.axis:rank] |
| targets = np.reshape(depth_range, (1,) * len(ls) + depth_range.shape + |
| (1,) * len(rs)) |
| values = np.reshape(np.mod(values, self.depth), ls + (1,) + rs) |
| np_tensor = np.asarray(targets == values, dtype=np.float32) |
| np_tensor = np_tensor * (self.values[1] - |
| self.values[0]) + self.values[0] |
| tmp_tensor = tensor.from_numpy(np_tensor) |
| tmp_tensor.to_device(indices.device()) |
| return tmp_tensor.data |
| |
| def backward(self, dy): |
| """ |
| backward of OneHot |
| Args: |
| dy (CTensor):gradient tensor. |
| Raises: |
| AssertionError: no backward function for this operator |
| """ |
| assert False, ('no gradient for backward function') |
| |
| |
| def onehot(axis, indices, depth, values): |
| """ |
| Produces a one-hot tensor based on inputs. |
| Args: |
| axis (int): Axis along which one-hot representation in added. |
| Default: axis=-1. axis=-1 means that the additional dimension |
| will be inserted as the innermost/last dimension in the output |
| tensor. |
| indices (Tensor): Scalar specifying the number of classes in |
| one-hot tensor. The values in the 'indices' input tensor are |
| expected to be in the range [-depth, depth-1]. |
| depth (int): Scalar specifying the number of classes in one-hot |
| tensor. This is also the size of the one-hot dimension |
| (specified by 'axis' attribute) added on in the output tensor. |
| The values in the 'indices' input tensor are expected to be in |
| the range [-depth, depth-1]. |
| values (float): Rank 1 tensor containing exactly two elements, in |
| the format [off_value, on_value], where 'on_value' is the |
| value used for filling locations specified in 'indices' input |
| tensor, |
| Returns: |
| the output Tensor. |
| """ |
| return OneHot(axis, depth, values)(indices)[0] |
| |
| |
| class _RNN(Operator): |
| """ RNN operation with c++ backend |
| """ |
| |
| def __init__( |
| self, |
| handle, |
| return_sequences=False, |
| # batch_first=True, |
| use_mask=False, |
| seq_lengths=None): |
| assert singa.USE_CUDA, "Not able to run without CUDA" |
| super(_RNN, self).__init__() |
| self.handle = handle |
| self.return_sequences = return_sequences |
| self.use_mask = use_mask |
| if use_mask: |
| assert type(seq_lengths) == Tensor, "wrong type for seq_lengths" |
| self.seq_lengths = seq_lengths |
| |
| def forward(self, x, hx, cx, w): |
| if training: |
| if self.use_mask: |
| (y, hy, |
| cy) = singa.GpuRNNForwardTrainingEx(x, hx, cx, w, |
| self.seq_lengths.data, |
| self.handle) |
| else: |
| (y, hy, |
| cy) = singa.GpuRNNForwardTraining(x, hx, cx, w, self.handle) |
| self.inputs = { |
| 'x': x, |
| 'hx': hx, |
| 'cx': cx, |
| 'w': w, |
| 'y': y, |
| 'hy': hy, |
| 'cy': cy |
| } |
| else: |
| if self.use_mask: |
| (y, hy, |
| cy) = singa.GpuRNNForwardInferenceEx(x, hx, cx, w, |
| self.seq_lengths.data, |
| self.handle) |
| else: |
| (y, hy, |
| cy) = singa.GpuRNNForwardInference(x, hx, cx, w, self.handle) |
| |
| if self.return_sequences: |
| # (seq, bs, data) |
| return y |
| else: |
| # return last time step of y |
| # (seq, bs, data)[-1] -> (bs, data) |
| last_y_shape = (y.shape()[1], y.shape()[2]) |
| last_y = singa.Tensor(list(last_y_shape), x.device()) |
| |
| src_offset = y.Size() - last_y.Size() |
| # def copy_data_to_from(dst, src, size, dst_offset=0, src_offset=0): |
| singa.CopyDataToFrom(last_y, y, last_y.Size(), 0, src_offset) |
| return last_y |
| |
| def backward(self, grad): |
| assert training is True and hasattr( |
| self, "inputs"), "Please set training as True before do BP. " |
| |
| # (seq, bs, hid) |
| dy = None |
| if self.return_sequences: |
| assert grad.shape() == self.inputs['y'].shape(), ( |
| "grad shape %s != y shape %s" % |
| (grad.shape(), self.inputs['y'].shape())) |
| dy = grad |
| else: |
| # grad (bs, directions*hidden) -> dy (seq, bs, directions*hidden) |
| # empty space filled by zeros |
| assert grad.shape() == (self.inputs['y'].shape()[1], |
| self.inputs['y'].shape()[2]), ( |
| "grad y shape %s != last y shape %s" % |
| (grad.shape(), |
| (self.inputs['y'].shape()[1], |
| self.inputs['y'].shape()[2]))) |
| dy = singa.Tensor(list(self.inputs['y'].shape()), grad.device()) |
| dy.SetFloatValue(0.0) |
| dst_offset = dy.Size() - grad.Size() |
| singa.CopyDataToFrom(dy, grad, grad.Size(), dst_offset, 0) |
| |
| # states grad are zeros, since states are not used in forward pass |
| dhy = singa.Tensor(list(self.inputs['hy'].shape()), grad.device()) |
| dhy.SetFloatValue(0.0) |
| dcy = singa.Tensor(list(self.inputs['cy'].shape()), grad.device()) |
| dcy.SetFloatValue(0.0) |
| |
| if self.use_mask: |
| (dx, dhx, |
| dcx) = singa.GpuRNNBackwardxEx(self.inputs['y'], dy, dhy, dcy, |
| self.inputs['w'], self.inputs['hx'], |
| self.inputs['cx'], |
| self.seq_lengths.data, self.handle) |
| dW = singa.GpuRNNBackwardWEx(self.inputs['x'], self.inputs['hx'], |
| self.inputs['y'], |
| self.seq_lengths.data, self.handle) |
| else: |
| (dx, dhx, |
| dcx) = singa.GpuRNNBackwardx(self.inputs['y'], dy, dhy, dcy, |
| self.inputs['w'], self.inputs['hx'], |
| self.inputs['cx'], self.handle) |
| dW = singa.GpuRNNBackwardW(self.inputs['x'], self.inputs['hx'], |
| self.inputs['y'], self.handle) |
| |
| return dx, dhx, dcx, dW |
| |
| |
| class CosSim(Operator): |
| """ |
| Init a cos similarity operator |
| """ |
| |
| def __init__(self): |
| super(CosSim, self).__init__() |
| |
| @classmethod |
| def dot(cls, a, b): |
| """ |
| dot multiply |
| Args: |
| a (CTensor): 2d input tensor. |
| b (CTensor): 2d input tensor. |
| Returns: |
| CTensor: the output CTensor. |
| """ |
| batch_size = a.shape()[0] |
| ret = [] |
| for indice in range(batch_size): |
| tmp_a = singa.SliceOn(a, indice, indice + 1, 0) # 1 * d |
| tmp_b = singa.SliceOn(b, indice, indice + 1, 0) # 1 * d |
| tmp_b = singa.DefaultTranspose(tmp_b) |
| tmp_tensor = singa.Mult(tmp_a, tmp_b) # 1 * d * d * 1 |
| ret.append(tmp_tensor) |
| ret = singa.VecTensor(ret) |
| ret = singa.ConcatOn(ret, 0) # b * 1 |
| return singa.Reshape(ret, [ret.shape()[0]]) # b |
| |
| def forward(self, a, b): |
| """ |
| forward of CosSim |
| Args: |
| a (CTensor): input tensor. |
| b (CTensor): input tensor. |
| Returns: |
| the output CTensor. |
| """ |
| ad = CosSim.dot(a, a) |
| bd = CosSim.dot(b, b) |
| ap = singa.PowFloat(ad, 0.5) |
| bp = singa.PowFloat(bd, 0.5) |
| ret = singa.__div__(CosSim.dot(a, b), singa.__mul__(ap, bp)) |
| if training: |
| self.cache = (a, b, ad, bd, ap, bp, ret) |
| return ret |
| |
| def backward(self, dy): |
| """ |
| backward of CosSim |
| follow https://math.stackexchange.com/a/1923705 |
| Args: |
| dy (CTensor): gradient tensor. |
| Return: |
| the gradient tensor over input tensor. |
| """ |
| a, b, ad, bd, ap, bp, ret = self.cache |
| ab = singa.__mul__(ap, bp) |
| ab = singa.Reshape(ab, list(ab.shape()) + [1]) # b * 1 |
| ad = singa.Reshape(ad, list(ad.shape()) + [1]) # b * 1 |
| bd = singa.Reshape(bd, list(bd.shape()) + [1]) # b * 1 |
| ret = singa.Reshape(ret, list(ret.shape()) + [1]) # b * 1 |
| dy = singa.Reshape(dy, list(dy.shape()) + [1]) # boardcast |
| da = singa.__sub__(singa.__div__(b, ab), |
| singa.__div__(singa.__mul__(ret, a), ad)) |
| db = singa.__sub__(singa.__div__(a, ab), |
| singa.__div__(singa.__mul__(ret, b), bd)) |
| da = singa.__mul__(dy, da) |
| db = singa.__mul__(dy, db) |
| return da, db |
| |
| |
| def cossim(a, b): |
| """ |
| Produces a cos similarity operator |
| Args: |
| a (CTensor): input tensor. |
| b (CTensor): input tensor. |
| Returns: |
| the output Tensor. |
| """ |
| assert a.shape == b.shape, "shape not match for cossim" |
| assert a.ndim() == 2, "shape should be in 2d for cossim" |
| assert b.ndim() == 2, "shape should be in 2d for cossim" |
| return CosSim()(a, b)[0] |
| |
| |
| class Expand(Operator): |
| """ |
| Expand operator following ONNX Operator Schemas |
| https://github.com/onnx/onnx/blob/master/docs/Operators.md#Expand |
| |
| Example usage:: |
| data = [[1.], [2.], [3.]] |
| |
| # dim_changed |
| shape = [2, 1, 6] |
| output = [[[1., 1., 1., 1., 1., 1.], |
| [2., 2., 2., 2., 2., 2.], |
| [3., 3., 3., 3., 3., 3.]], |
| [[1., 1., 1., 1., 1., 1.], |
| [2., 2., 2., 2., 2., 2.], |
| [3., 3., 3., 3., 3., 3.]]] |
| |
| # dim_unchanged |
| shape = [3, 4] |
| output = [[1., 1., 1., 1.], |
| [2., 2., 2., 2.], |
| [3., 3., 3., 3.]] |
| """ |
| |
| def __init__(self, shape): |
| """ |
| Args: |
| shape (list[int]: indicates the shape you want to expand to, |
| following the broadcast rule |
| """ |
| super(Expand, self).__init__() |
| self.shape = shape |
| |
| def forward(self, x): |
| if isinstance(self.shape, np.ndarray): |
| self.shape = self.shape.tolist() |
| else: |
| self.shape = list(self.shape) |
| self.x_shape = list(x.shape()) |
| x_shape = self.x_shape.copy() |
| self.dim_changed = True if len(self.shape) != len(x_shape) else False |
| if self.dim_changed: |
| tmp_tensor = singa.Tensor(self.shape, x.device()) |
| tmp_tensor.SetFloatValue(1.) |
| x = singa.__mul__(x, tmp_tensor) |
| else: |
| for axis, s_1, s_2 in zip(range(len(self.shape)), self.shape, |
| x_shape): |
| if s_1 == s_2: |
| continue |
| xs = [x] * (s_1 // s_2) |
| x = singa.VecTensor(xs) |
| x = singa.ConcatOn(x, axis) |
| return x |
| |
| def backward(self, dy): |
| x_shape = self.x_shape |
| if self.dim_changed: |
| dy = tensor.from_raw_tensor(dy) |
| if len(self.shape) > len(x_shape): |
| x_shape = [1] * (len(self.shape) - len(x_shape)) + x_shape |
| for axis, s in zip(range(len(self.shape))[::-1], x_shape[::1]): |
| if s == 1: |
| dy = tensor.sum(dy, axis) |
| dy = dy.data |
| else: |
| for axis, s_1, s_2 in zip( |
| range(len(self.shape))[::-1], self.shape[::-1], |
| x_shape[::-1]): |
| if s_1 > s_2: |
| duplic = s_1 // s_2 |
| dxs = [] |
| for i in range(s_2): |
| tmp_tensor = None |
| for j in range(duplic): |
| if not tmp_tensor: |
| tmp_tensor = singa.SliceOn( |
| dy, j * s_2 + i, j * s_2 + i + 1, axis) |
| else: |
| tmp_tensor += singa.SliceOn( |
| dy, j * s_2 + i, j * s_2 + i + 1, axis) |
| dxs.append(tmp_tensor) |
| dxs = singa.VecTensor(dxs) |
| dy = singa.ConcatOn(dxs, axis) |
| dy = singa.Reshape(dy, self.x_shape) |
| return dy |
| |
| |
| def expand(x, shape): |
| """ |
| Produces a Expand operator |
| Args: |
| x (Tensor): input tensor. |
| shape (list[int]: indicates the shape you want to expand to, |
| following the broadcast rule |
| Returns: |
| the output Tensor. |
| """ |
| return Expand(shape)(x)[0] |
| |
| |
| class Pad(Operator): |
| """ |
| Pad operator following ONNX Operator Schemas |
| https://github.com/onnx/onnx/blob/master/docs/Operators.md#Pad |
| |
| Example usage:: |
| data = |
| [ |
| [1.0, 1.2], |
| [2.3, 3.4], |
| [4.5, 5.7], |
| ] |
| pads = [0, 2, 0, 0] |
| |
| # constant mode |
| mode = 'constant' |
| constant_value = 0.0 |
| output = |
| [ |
| [ |
| [0.0, 0.0, 1.0, 1.2], |
| [0.0, 0.0, 2.3, 3.4], |
| [0.0, 0.0, 4.5, 5.7], |
| ], |
| ] |
| |
| # reflect mode |
| mode = 'reflect' |
| output = |
| [ |
| [ |
| [1.0, 1.2, 1.0, 1.2], |
| [2.3, 3.4, 2.3, 3.4], |
| [4.5, 5.7, 4.5, 5.7], |
| ], |
| ] |
| |
| # edge mode |
| mode = 'edge' |
| output = |
| [ |
| [ |
| [1.0, 1.0, 1.0, 1.2], |
| [2.3, 2.3, 2.3, 3.4], |
| [4.5, 4.5, 4.5, 5.7], |
| ], |
| ] |
| """ |
| |
| def __init__(self, mode, pads, constant=0.): |
| """ |
| Args: |
| mode (string): Supported modes: `constant`(default), `reflect`, `edge`. |
| pads (list[int]): list of integers indicating the number of padding elements |
| to add at the beginning each axis. |
| constant (float): A scalar value to be used if the mode chosen is |
| `constant` |
| """ |
| super(Pad, self).__init__() |
| self.mode = mode |
| if self.mode not in ("constant", "reflect", "edge"): |
| assert False, ('Only support three modes: constant, reflect, edge') |
| self.constant = constant |
| self.pads = pads |
| self.pad_width = () |
| |
| def forward(self, x): |
| if not self.pad_width: |
| half_width = len(self.pads) // 2 |
| for i in range(half_width): |
| self.pad_width += ((self.pads[i], self.pads[i + half_width])), |
| |
| for axis, pads in zip(range(len(x.shape())), self.pad_width): |
| for pad, is_left in zip(pads, (True, False)): |
| if pad == 0: |
| continue |
| pad_shape = list(x.shape()) |
| if self.mode == "constant": |
| pad_shape[axis] = pad |
| padding = singa.Tensor(list(pad_shape), x.device()) |
| padding.SetFloatValue(self.constant) |
| if is_left: |
| x = singa.ConcatOn(singa.VecTensor([padding, x]), axis) |
| else: |
| x = singa.ConcatOn(singa.VecTensor([x, padding]), axis) |
| elif self.mode == "reflect": |
| axis_shape = pad_shape[axis] |
| if is_left: |
| padding = singa.SliceOn(x, 0, pad, axis) |
| x = singa.ConcatOn(singa.VecTensor([padding, x]), axis) |
| else: |
| padding = singa.SliceOn(x, axis_shape - pad, axis_shape, |
| axis) |
| x = singa.ConcatOn(singa.VecTensor([x, padding]), axis) |
| elif self.mode == "edge": |
| axis_shape = pad_shape[axis] |
| if is_left: |
| padding = [] |
| for _ in range(pad): |
| padding.append(singa.SliceOn(x, 0, 1, axis)) |
| padding.append(x) |
| padding = singa.VecTensor(padding) |
| x = singa.ConcatOn(padding, axis) |
| else: |
| padding = [x] |
| for _ in range(pad): |
| padding.append( |
| singa.SliceOn(x, axis_shape - 1, axis_shape, |
| axis)) |
| padding = singa.VecTensor(padding) |
| x = singa.ConcatOn(padding, axis) |
| return x |
| |
| def backward(self, dy): |
| for axis, pads in zip(range(len(dy.shape())), self.pad_width): |
| for pad, is_left in zip(pads, (True, False)): |
| if pad == 0: |
| continue |
| axis_shape = list(dy.shape())[axis] |
| if is_left: |
| dy = singa.SliceOn(dy, pad, axis_shape, axis) |
| else: |
| dy = singa.SliceOn(dy, 0, axis_shape - pad, axis) |
| return dy |
| |
| |
| def pad(x, mode, pads, constant=0.): |
| """ |
| Produces a pad operator |
| Args: |
| x (Tensor): input tensor. |
| mode (string): Supported modes: `constant`(default), `reflect`, `edge`. |
| pads (list[int]): list of integers indicating the number of padding elements |
| to add at the beginning each axis. |
| constant (float): A scalar value to be used if the mode chosen is |
| `constant` |
| Returns: |
| the output Tensor. |
| """ |
| return Pad(mode, pads, constant)(x)[0] |
| |
| |
| class UpSample(Operator): |
| """ |
| UpSample operator following ONNX Operator Schemas |
| https://github.com/onnx/onnx/blob/master/docs/Operators.md#upsample |
| |
| Example usage:: |
| data = [[[[1, 2], |
| [3, 4],]]] |
| |
| # nearest |
| scales = [1.0, 1.0, 2.0, 3.0] |
| output = [[[[1, 1, 1, 2, 2, 2], |
| [1, 1, 1, 2, 2, 2], |
| [3, 3, 3, 4, 4, 4], |
| [3, 3, 3, 4, 4, 4],]]] |
| """ |
| |
| def __init__(self, mode, scales): |
| """ |
| Args: |
| scales (list[int]): The scale array along each dimension. It takes |
| value greater than or equal to 1. |
| """ |
| super(UpSample, self).__init__() |
| self.scales = scales |
| self.mode = mode.lower() |
| if self.mode != "nearest": |
| assert False, "only support nearest mode." |
| |
| def forward(self, x): |
| if isinstance(self.scales, np.ndarray): |
| self.scales = self.scales.tolist() |
| else: |
| self.scales = list(self.scales) |
| self.x_shape = list(x.shape()) |
| for axis, s in zip(range(len(self.scales)), self.scales): |
| s = int(s) |
| if s == 1: |
| continue |
| x = x.Repeat([ |
| s, |
| ], axis) |
| return x |
| |
| def backward(self, dy): |
| x_shape = self.x_shape.copy() |
| for axis, s_1, s_2 in zip( |
| range(len(self.scales))[::-1], self.scales[::-1], |
| x_shape[::-1]): |
| s_1 = int(s_1) |
| if s_1 != 1: |
| duplic = s_1 |
| dxs = [] |
| for i in range(s_2): |
| tmp_tensor = None |
| for j in range(duplic): |
| if not tmp_tensor: |
| tmp_tensor = singa.SliceOn(dy, i * duplic + j, |
| i * duplic + j + 1, axis) |
| else: |
| tmp_tensor += singa.SliceOn(dy, i * duplic + j, |
| i * duplic + j + 1, |
| axis) |
| dxs.append(tmp_tensor) |
| dxs = singa.VecTensor(dxs) |
| dy = singa.ConcatOn(dxs, axis) |
| dy = singa.Reshape(dy, self.x_shape) |
| return dy |
| |
| |
| def upsample(x, mode, scales): |
| """ |
| Produces a upsample operator |
| Args: |
| x (Tensor): input tensor. |
| scales (list[int]): The scale array along each dimension. It takes |
| value greater than or equal to 1. |
| Returns: |
| the output Tensor. |
| """ |
| return UpSample(mode, scales)(x)[0] |
| |
| |
| class DepthToSpace(Operator): |
| """ |
| DepthToSpace operator following ONNX Operator Schemas |
| https://github.com/onnx/onnx/blob/master/docs/Operators.md#DepthToSpace |
| |
| Example usage:: |
| blocksize = 2 |
| # (1, 8, 2, 3) input tensor |
| data = [[[[0., 1., 2.], |
| [3., 4., 5.]], |
| [[9., 10., 11.], |
| [12., 13., 14.]], |
| [[18., 19., 20.], |
| [21., 22., 23.]], |
| [[27., 28., 29.], |
| [30., 31., 32.]], |
| [[36., 37., 38.], |
| [39., 40., 41.]], |
| [[45., 46., 47.], |
| [48., 49., 50.]], |
| [[54., 55., 56.], |
| [57., 58., 59.]], |
| [[63., 64., 65.], |
| [66., 67., 68.]]]] |
| |
| # DCR mode |
| # (1, 2, 4, 6) output tensor |
| output = [[[[0., 18., 1., 19., 2., 20.], |
| [36., 54., 37., 55., 38., 56.], |
| [3., 21., 4., 22., 5., 23.], |
| [39., 57., 40., 58., 41., 59.]], |
| [[9., 27., 10., 28., 11., 29.], |
| [45., 63., 46., 64., 47., 65.], |
| [12., 30., 13., 31., 14., 32.], |
| [48., 66., 49., 67., 50., 68.]]]] |
| |
| # CRD mode |
| # (1, 2, 4, 6) output tensor |
| output = [[[[0., 9., 1., 10., 2., 11.], |
| [18., 27., 19., 28., 20., 29.], |
| [3., 12., 4., 13., 5., 14.], |
| [21., 30., 22., 31., 23., 32.]], |
| [[36., 45., 37., 46., 38., 47.], |
| [54., 63., 55., 64., 56., 65.], |
| [39., 48., 40., 49., 41., 50.], |
| [57., 66., 58., 67., 59., 68.]]]] |
| """ |
| |
| def __init__(self, blocksize, mode="DCR"): |
| """ |
| Args: |
| blocksize (int): Blocks of [blocksize, blocksize] are moved. |
| mode (string): DCR (default) for depth-column-row order re- |
| arrangement. Use CRD for column-row-depth order. |
| """ |
| super(DepthToSpace, self).__init__() |
| self.blocksize = blocksize |
| self.mode = mode.upper() |
| |
| def forward(self, x): |
| if training: |
| self.x_shape = x.shape() |
| b, c, h, w = x.shape() |
| blocksize = self.blocksize |
| if self.mode == "DCR": |
| x = singa.Reshape( |
| x, [b, blocksize, blocksize, c // (blocksize**2), h, w]) |
| x = singa.Transpose(x, [0, 3, 4, 1, 5, 2]) |
| x = singa.Reshape( |
| x, [b, c // (blocksize**2), h * blocksize, w * blocksize]) |
| elif self.mode == "CRD": |
| x = singa.Reshape( |
| x, [b, c // (blocksize**2), blocksize, blocksize, h, w]) |
| x = singa.Transpose(x, [0, 1, 4, 2, 5, 3]) |
| x = singa.Reshape( |
| x, [b, c // (blocksize**2), h * blocksize, w * blocksize]) |
| else: |
| assert False, ("only support two methods: DCR and CRD.") |
| return x |
| |
| def backward(self, dy): |
| b, c, h, w = self.x_shape |
| blocksize = self.blocksize |
| dy = singa.Reshape( |
| dy, [b, c // (blocksize**2), h, blocksize, w, blocksize]) |
| if self.mode == "DCR": |
| dy = singa.Transpose(dy, [0, 3, 5, 1, 2, 4]) |
| elif self.mode == "CRD": |
| dy = singa.Transpose(dy, [0, 1, 3, 5, 2, 4]) |
| else: |
| assert False, ("only support two methods: DCR and CRD.") |
| dy = singa.Reshape(dy, self.x_shape) |
| return dy |
| |
| |
| def depth_to_space(x, blocksize, mode="DCR"): |
| """ |
| Produces a DepthToSpace operator |
| Args: |
| x (Tensor): input tensor. |
| blocksize (int): Blocks of [blocksize, blocksize] are moved. |
| mode (string): DCR (default) for depth-column-row order re- |
| arrangement. Use CRD for column-row-depth order. |
| Returns: |
| the output Tensor. |
| """ |
| return DepthToSpace(blocksize, mode)(x)[0] |
| |
| |
| class SpaceToDepth(Operator): |
| """ |
| SpaceToDepth operator following ONNX Operator Schemas, reverse of DepthToSpace |
| https://github.com/onnx/onnx/blob/master/docs/Operators.md#SpaceToDepth |
| """ |
| |
| def __init__(self, blocksize, mode="DCR"): |
| """ |
| Args: |
| blocksize (int): Blocks of [blocksize, blocksize] are moved. |
| mode (string): DCR (default) for depth-column-row order re- |
| arrangement. Use CRD for column-row-depth order. |
| """ |
| super(SpaceToDepth, self).__init__() |
| self.blocksize = blocksize |
| self.mode = mode.upper() |
| |
| def forward(self, x): |
| blocksize = self.blocksize |
| b, c, h, w = x.shape() |
| b, c, h, w = b, c * (blocksize**2), h // blocksize, w // blocksize |
| if training: |
| self.x_shape = (b, c, h, w) |
| x = singa.Reshape( |
| x, [b, c // (blocksize**2), h, blocksize, w, blocksize]) |
| if self.mode == "DCR": |
| x = singa.Transpose(x, [0, 3, 5, 1, 2, 4]) |
| elif self.mode == "CRD": |
| x = singa.Transpose(x, [0, 1, 3, 5, 2, 4]) |
| else: |
| assert False, ("only support two methods: DCR and CRD.") |
| x = singa.Reshape(x, self.x_shape) |
| return x |
| |
| def backward(self, dy): |
| b, c, h, w = self.x_shape |
| blocksize = self.blocksize |
| if self.mode == "DCR": |
| dy = singa.Reshape( |
| dy, [b, blocksize, blocksize, c // (blocksize**2), h, w]) |
| dy = singa.Transpose(dy, [0, 3, 4, 1, 5, 2]) |
| dy = singa.Reshape( |
| dy, [b, c // (blocksize**2), h * blocksize, w * blocksize]) |
| elif self.mode == "CRD": |
| dy = singa.Reshape( |
| dy, [b, c // (blocksize**2), blocksize, blocksize, h, w]) |
| dy = singa.Transpose(dy, [0, 1, 4, 2, 5, 3]) |
| dy = singa.Reshape( |
| dy, [b, c // (blocksize**2), h * blocksize, w * blocksize]) |
| else: |
| assert False, ("only support two methods: DCR and CRD.") |
| return dy |
| |
| |
| def space_to_depth(x, blocksize, mode="DCR"): |
| """ |
| Produces a SpaceToDepth operator |
| Args: |
| x (Tensor): input tensor. |
| blocksize (int): Blocks of [blocksize, blocksize] are moved. |
| mode (string): DCR (default) for depth-column-row order re- |
| arrangement. Use CRD for column-row-depth order. |
| Returns: |
| the output Tensor. |
| """ |
| return SpaceToDepth(blocksize, mode)(x)[0] |
| |
| |
| class Where(Operator): |
| """ |
| Where operator following ONNX Operator Schemas |
| https://github.com/onnx/onnx/blob/master/docs/Operators.md#Where |
| and Numpy |
| https://numpy.org/doc/stable/reference/generated/numpy.where.html |
| Example usage:: |
| condition = [[True, False], |
| [True, True]] |
| x = [[1, 2], |
| [3, 4]] |
| y = [[9, 8], |
| [7, 6]] |
| |
| output = [[1, 8], |
| [3, 4]] |
| """ |
| |
| def __init__(self, condition): |
| """ |
| Args: |
| condition (Tensor): When True (nonzero), yield X, otherwise yield Y |
| """ |
| super(Where, self).__init__() |
| self.condition = condition |
| |
| def forward(self, a, b): |
| if isinstance(self.condition, list): |
| self.condition = np.array(self.condition) |
| if isinstance(self.condition, np.ndarray): |
| self.condition = self.condition.astype(np.float32) |
| self.condition = tensor.from_numpy(self.condition) |
| self.condition.to_device(a.device()) |
| self.condition = self.condition.data |
| self.neg_condition = singa.AddFloat( |
| singa.MultFloat(self.condition, -1.), 1.) |
| _a, _b = a, b |
| dtype0 = _a.data_type() |
| dtype1 = _b.data_type() |
| if dtype0 == singa.kInt or dtype1 == singa.kInt: |
| _a = a.AsType(singa.kFloat32) |
| _b = b.AsType(singa.kFloat32) |
| res = singa.__add__(singa.__mul__(self.condition, _a), |
| singa.__mul__(self.neg_condition, _b)) |
| res = res.AsType(singa.kInt) |
| else: |
| res = singa.__add__(singa.__mul__(self.condition, _a), |
| singa.__mul__(self.neg_condition, _b)) |
| return res |
| |
| def backward(self, dy): |
| da = singa.__mul__(self.condition, dy) |
| db = singa.__mul__(self.neg_condition, dy) |
| return da, db |
| |
| |
| def where(x, y, condition): |
| """ |
| Produces a Where operator |
| Args: |
| x (Tensor): input tensor. |
| y (Tensor): input tensor. |
| condition (Tensor): When True (nonzero), yield X, otherwise yield Y |
| Returns: |
| the output Tensor. |
| """ |
| return Where(condition)(x, y)[0] |
| |
| |
| class Round(Operator): |
| """ |
| Element-wise round the input |
| """ |
| |
| def __init__(self): |
| super(Round, self).__init__() |
| |
| def forward(self, x): |
| return singa.Round(x) |
| |
| def backward(self, dy): |
| dy = singa.Tensor(dy.shape(), dy.device()) |
| dy.SetFloatValue(0.) |
| return dy |
| |
| |
| def round(x): |
| """ |
| Element-wise round the input |
| Args: |
| x (Tensor): input tensor. |
| Returns: |
| the output Tensor. |
| """ |
| return Round()(x)[0] |
| |
| |
| class Rounde(Operator): |
| """ |
| Element-wise round the input, In case of halfs, round to the nearest even integer |
| """ |
| |
| def __init__(self): |
| super(Rounde, self).__init__() |
| |
| def forward(self, x): |
| return singa.RoundE(x) |
| |
| def backward(self, dy): |
| dy = singa.Tensor(dy.shape(), dy.device()) |
| dy.SetFloatValue(0.) |
| return dy |
| |
| |
| def rounde(x): |
| """ |
| Element-wise round the input, In case of halfs, round to the nearest even integer |
| Args: |
| x (Tensor): input tensor. |
| Returns: |
| the output Tensor. |
| """ |
| return Rounde()(x)[0] |
| |
| |
| class Embedding(Operator): |
| """ |
| Init an embedding operator |
| """ |
| |
| def __init__(self): |
| super(Embedding, self).__init__() |
| |
| def forward(self, x, w): |
| """ |
| forward of embedding |
| Args: |
| x (CTensor): input tensor. |
| w (CTensor): weight tensor. |
| Returns: |
| the output CTensor. |
| """ |
| x = tensor.to_numpy(tensor.from_raw_tensor(x)) |
| if training: |
| self.cache = (x, w.shape()) |
| |
| xs = [] |
| x = x.tolist() |
| for indice in x: |
| sub_xs = [] |
| for idx in indice: |
| idx = int(idx) |
| tmp_tensor = singa.SliceOn(w, idx, idx + 1, 0) |
| sub_xs.append(tmp_tensor) |
| sub_xs = singa.VecTensor(sub_xs) |
| tmp_tensor = singa.ConcatOn(sub_xs, 0) |
| tmp_tensor = singa.Reshape(tmp_tensor, |
| [1] + list(tmp_tensor.shape())) |
| |
| xs.append(tmp_tensor) |
| xs = singa.VecTensor(xs) |
| xs = singa.ConcatOn(xs, 0) |
| return xs |
| |
| def backward(self, dy): |
| """ |
| backward of embedding |
| Args: |
| dy (CTensor): gradient tensor. |
| Raises: |
| the gradient tensor over input tensor. |
| """ |
| x, w_shape = self.cache |
| dy_shape = dy.shape() |
| # construct the dx |
| dx = tensor.sum(tensor.from_raw_tensor(dy), axis=2) |
| |
| # construct the dw |
| dws = [] |
| for idx in range(w_shape[0]): |
| tmp_tensor = singa.Tensor((1, w_shape[1]), dy.device()) |
| tmp_tensor.SetFloatValue(0.0) |
| dws.append(tmp_tensor) |
| dy = singa.Reshape(dy, [dy_shape[0] * dy_shape[1], dy_shape[2]]) |
| x = x.reshape(-1) |
| for idx, val in enumerate(x): |
| tmp_tensor = singa.SliceOn(dy, idx, idx + 1, 0) |
| dws[val] = singa.__add__(dws[val], tmp_tensor) |
| dws = singa.VecTensor(dws) |
| return dx.data, singa.ConcatOn(dws, 0) |
| |
| |
| def embedding(x, w): |
| """ |
| Produces an embedding operator. |
| Args: |
| Returns: |
| the output Tensor. |
| """ |
| return Embedding()(x, w)[0] |
| |
| |
| class Erf(Operator): |
| """ |
| Apply element-wise math.erf to the input |
| """ |
| |
| def __init__(self): |
| super(Erf, self).__init__() |
| |
| def forward(self, x): |
| return singa.Erf(x) |
| |
| def backward(self, dy): |
| dx = singa.MultFloat(singa.PowFloat(dy, 2.0), -1.0) |
| dx = singa.MultFloat(singa.Exp(dx), 2. / np.pi ** 0.5) |
| return dx |
| |
| |
| def erf(x): |
| """ |
| Apply element-wise math.erf to the input |
| Args: |
| x (Tensor): input tensor. |
| Returns: |
| the output Tensor. |
| """ |
| return Erf()(x)[0] |
| |
| |
| ''' alias for Operator and Layers |
| ''' |
| Operation = Operator |
| ''' import layer at the end to resolve circular import |
| ''' |
| from singa import layer |
| Linear = layer.Linear |
| Conv2d = layer.Conv2d |
| SeparableConv2d = layer.SeparableConv2d |
| BatchNorm2d = layer.BatchNorm2d |
| Pooling2d = layer.Pooling2d |
| MaxPool2d = layer.MaxPool2d |
| AvgPool2d = layer.AvgPool2d |
| MaxPool1d = layer.MaxPool1d |
| AvgPool1d = layer.AvgPool1d |
| RNN_Base = layer.RNN_Base |
| RNN = layer.RNN |
| LSTM = layer.LSTM |