blob: 98b19c3223a292da8f6ed491bbae8e7296fa38d3 [file] [log] [blame]
"""Random projection layers in MXNet as custom python ops.
Currently slow and memory-inefficient, but functional.
"""
import os
# MXNET_CPU_WORKER_NTHREADS must be greater than 1 for custom op to work on CPU
os.environ["MXNET_CPU_WORKER_NTHREADS"] = "2"
import numpy as np
import mxnet as mx
# ref: http://mxnet.io/how_to/new_op.html
class RandomBagOfWordsProjection(mx.operator.CustomOp):
"""Random projection layer for sparse bag-of-words (n-hot) inputs.
In the sparse input, only the indices are supplied, because all the
values are understood to be exactly 1.0.
See also RandomProjection for values other than 1.0.
"""
def __init__(self, vocab_size, output_dim, random_seed=54321):
# need_top_grad=True means this is not a loss layer
super(RandomBagOfWordsProjection, self).__init__()
self._vocab = vocab_size
self._proj_dim = output_dim
#NOTE: This naive implementation is slow and uses lots of memory.
# Should use something smarter to not instantiate this matrix.
rs = np.random.RandomState(seed=random_seed)
self.W = self.random_unit_vecs(self._vocab, self._proj_dim, rs)
def random_unit_vecs(self, num_vecs, num_dims, rs):
W = rs.normal(size=(num_vecs, num_dims))
Wlen = np.linalg.norm(W, axis=1)
W_unit = W / Wlen[:,None]
return W_unit
def _get_mask(self, idx, in_data):
"""Returns the mask by which to multiply the parts of the embedding layer.
In this version, we have no weights to apply.
"""
mask = idx >= 0 # bool False for -1 values that should be removed. shape=(b,mnz)
mask = np.expand_dims(mask,2) # shape = (b,mnz,1)
mask = np.repeat(mask, self._proj_dim, axis=2) # shape = (b,mnz,d)
return mask
def forward(self, is_train, req, in_data, out_data, aux):
#Note: see this run in notebooks/howto-numpy-random-proj.ipynb
# Notation for shapes: b = batch_size, mnz = max_nonzero, d = proj_dim
idx = in_data[0].asnumpy().astype('int32') # shape=(b,mnz)
wd = self.W[idx] # shape= (b,mnz,d)
mask = self._get_mask(idx, in_data)
wd = np.multiply(wd,mask) # shape=(b,mnz,d), but zero'd out non-masked
y = np.sum(wd,axis=1) # shape=(b,d)
mxy = mx.nd.array(y) #NOTE: this hangs if the environment variables aren't set correctly
# See https://github.com/dmlc/mxnet/issues/3813
self.assign(out_data[0], req[0], mxy)
@mx.operator.register("SparseBOWProj")
class RandomBagOfWordsProjectionProp(mx.operator.CustomOpProp):
def __init__(self, vocab_size, output_dim):
# need_top_grad=True means this is not a loss layer
super(RandomBagOfWordsProjectionProp, self).__init__(need_top_grad=True)
self._kwargs = {
'vocab_size': int(vocab_size),
'output_dim': int(output_dim),
}
def list_arguments(self):
return ['indexes']
def list_outputs(self):
return ['output']
def create_operator(self, ctx, shapes, dtypes, **kwargs):
return RandomBagOfWordsProjection(**self._kwargs)
def infer_shape(self, in_shape):
batch_size = in_shape[0][0]
output_shape = (batch_size, self._kwargs['output_dim'])
return in_shape, [output_shape], []
class SparseRandomProjection(RandomBagOfWordsProjection):
"""Random projection of sparse input vector.
Takes an sparse input layer, effectively in coordinate (COO) format,
where the row number is implicit, because it's the minibatch record.
See the simpler version RandomBagOfWordsProjection if all values are 1.0.
"""
def _get_mask(self, idx, in_data):
"""Returns the mask by which to multiply the parts of the embedding layer.
In this version, we apply the weights.
"""
val = in_data[1].asnumpy() # shape=(b,mnz)
mask = idx >= 0 # bool False for -1 values that should be removed. shape=(b,mnz)
mask = np.multiply(mask,val) # All (b,mnz)
mask = np.expand_dims(mask,2) # shape = (b,mnz,1)
mask = np.repeat(mask, self._proj_dim, axis=2) # shape = (b,mnz,d)
return mask
@mx.operator.register("SparseRandomProjection")
class SparseRandomProjectionProp(RandomBagOfWordsProjectionProp):
def list_arguments(self):
return ['indexes', 'values']
def create_operator(self, ctx, shapes, dtypes, **kwargs):
return SparseRandomProjection(**self._kwargs)
def infer_shape(self, in_shape):
# check that indexes and values are the same shape.
if in_shape[0] != in_shape[1]:
raise ValueError("Input shapes differ. indexes:%s. values:%s. must be same"
% (str(in_shape[0]),str(in_shape[1])))
return super(SparseRandomProjectionProp,self).infer_shape(in_shape)
if __name__ == "__main__":
print("Simple test of proj layer")
data = mx.symbol.Variable('data')
vals = mx.symbol.Variable('vals')
net = mx.symbol.Custom(indexes=data, values=vals, name='rproj',
op_type='SparseRandomProjection',
vocab_size=999, output_dim=29)
d = mx.nd.zeros(shape=(3,100))
v = mx.nd.ones(shape=(3,100))
e = net.bind(ctx=mx.cpu(), args={'data':d, 'vals':v})
e.forward()
print(e.outputs[0].asnumpy())
print("Done with proj layer test")