example/nce-loss/random_data.py - mxnet - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 # pylint: disable=missing-docstring
 from __future__ import print_function

 import random

 import mxnet as mx
 import numpy as np


 class SimpleBatch(object):
     def __init__(self, data_names, data, label_names, label):
         self.data = data
         self.label = label
         self.data_names = data_names
         self.label_names = label_names

     @property
     def provide_data(self):
         return [(n, x.shape) for n, x in zip(self.data_names, self.data)]

     @property
     def provide_label(self):
         return [(n, x.shape) for n, x in zip(self.label_names, self.label)]


 class DataIterSoftmax(mx.io.DataIter):
     def __init__(self, count, batch_size, vocab_size, num_label, feature_size):
         super(DataIterSoftmax, self).__init__()
         self.batch_size = batch_size
         self.count = count
         self.vocab_size = vocab_size
         self.num_label = num_label
         self.feature_size = feature_size
         self.provide_data = [('data', (batch_size, feature_size))]
         self.provide_label = [('label', (self.batch_size,))]

     def mock_sample(self):
         ret = np.zeros(self.feature_size)
         rn = set()
         while len(rn) < 3:
             rn.add(random.randint(0, self.feature_size - 1))
         s = 0
         for k in rn:
             ret[k] = 1.0
             s *= self.feature_size
             s += k
         return ret, s % self.vocab_size

     def __iter__(self):
         for _ in range(self.count // self.batch_size):
             data = []
             label = []
             for _ in range(self.batch_size):
                 d, l = self.mock_sample()
                 data.append(d)
                 label.append(l)
             data_all = [mx.nd.array(data)]
             label_all = [mx.nd.array(label)]
             data_names = ['data']
             label_names = ['label']
             yield SimpleBatch(data_names, data_all, label_names, label_all)

     def reset(self):
         pass


 class DataIterNce(mx.io.DataIter):
     def __init__(self, count, batch_size, vocab_size, num_label, feature_size):
         super(DataIterNce, self).__init__()
         self.batch_size = batch_size
         self.count = count
         self.vocab_size = vocab_size
         self.num_label = num_label
         self.feature_size = feature_size
         self.provide_data = [('data', (batch_size, feature_size))]
         self.provide_label = [('label', (self.batch_size, num_label)),
                               ('label_weight', (self.batch_size, num_label))]

     def mock_sample(self):
         ret = np.zeros(self.feature_size)
         rn = set()
         while len(rn) < 3:
             rn.add(random.randint(0, self.feature_size - 1))
         s = 0
         for k in rn:
             ret[k] = 1.0
             s *= self.feature_size
             s += k
         la = [s % self.vocab_size] +\
              [random.randint(0, self.vocab_size - 1) for _ in range(self.num_label - 1)]
         return ret, la

     def __iter__(self):
         for _ in range(self.count // self.batch_size):
             data = []
             label = []
             label_weight = []
             for _ in range(self.batch_size):
                 d, l = self.mock_sample()
                 data.append(d)
                 label.append(l)
                 label_weight.append([1.0] + [0.0 for _ in range(self.num_label - 1)])
             data_all = [mx.nd.array(data)]
             label_all = [mx.nd.array(label), mx.nd.array(label_weight)]
             data_names = ['data']
             label_names = ['label', 'label_weight']
             yield SimpleBatch(data_names, data_all, label_names, label_all)

     def reset(self):
         pass
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	# pylint: disable=missing-docstring
	from __future__ import print_function

	import random

	import mxnet as mx
	import numpy as np


	class SimpleBatch(object):
	def __init__(self, data_names, data, label_names, label):
	self.data = data
	self.label = label
	self.data_names = data_names
	self.label_names = label_names

	@property
	def provide_data(self):
	return [(n, x.shape) for n, x in zip(self.data_names, self.data)]

	@property
	def provide_label(self):
	return [(n, x.shape) for n, x in zip(self.label_names, self.label)]


	class DataIterSoftmax(mx.io.DataIter):
	def __init__(self, count, batch_size, vocab_size, num_label, feature_size):
	super(DataIterSoftmax, self).__init__()
	self.batch_size = batch_size
	self.count = count
	self.vocab_size = vocab_size
	self.num_label = num_label
	self.feature_size = feature_size
	self.provide_data = [('data', (batch_size, feature_size))]
	self.provide_label = [('label', (self.batch_size,))]

	def mock_sample(self):
	ret = np.zeros(self.feature_size)
	rn = set()
	while len(rn) < 3:
	rn.add(random.randint(0, self.feature_size - 1))
	s = 0
	for k in rn:
	ret[k] = 1.0
	s *= self.feature_size
	s += k
	return ret, s % self.vocab_size

	def __iter__(self):
	for _ in range(self.count // self.batch_size):
	data = []
	label = []
	for _ in range(self.batch_size):
	d, l = self.mock_sample()
	data.append(d)
	label.append(l)
	data_all = [mx.nd.array(data)]
	label_all = [mx.nd.array(label)]
	data_names = ['data']
	label_names = ['label']
	yield SimpleBatch(data_names, data_all, label_names, label_all)

	def reset(self):
	pass


	class DataIterNce(mx.io.DataIter):
	def __init__(self, count, batch_size, vocab_size, num_label, feature_size):
	super(DataIterNce, self).__init__()
	self.batch_size = batch_size
	self.count = count
	self.vocab_size = vocab_size
	self.num_label = num_label
	self.feature_size = feature_size
	self.provide_data = [('data', (batch_size, feature_size))]
	self.provide_label = [('label', (self.batch_size, num_label)),
	('label_weight', (self.batch_size, num_label))]

	def mock_sample(self):
	ret = np.zeros(self.feature_size)
	rn = set()
	while len(rn) < 3:
	rn.add(random.randint(0, self.feature_size - 1))
	s = 0
	for k in rn:
	ret[k] = 1.0
	s *= self.feature_size
	s += k
	la = [s % self.vocab_size] +\
	[random.randint(0, self.vocab_size - 1) for _ in range(self.num_label - 1)]
	return ret, la

	def __iter__(self):
	for _ in range(self.count // self.batch_size):
	data = []
	label = []
	label_weight = []
	for _ in range(self.batch_size):
	d, l = self.mock_sample()
	data.append(d)
	label.append(l)
	label_weight.append([1.0] + [0.0 for _ in range(self.num_label - 1)])
	data_all = [mx.nd.array(data)]
	label_all = [mx.nd.array(label), mx.nd.array(label_weight)]
	data_names = ['data']
	label_names = ['label', 'label_weight']
	yield SimpleBatch(data_names, data_all, label_names, label_all)

	def reset(self):
	pass