example/image-classification/common/data.py - mxnet - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 import mxnet as mx
 import random
 from mxnet.io import DataBatch, DataIter
 import numpy as np

 def add_data_args(parser):
     data = parser.add_argument_group('Data', 'the input images')
     data.add_argument('--data-train', type=str, help='the training data')
     data.add_argument('--data-train-idx', type=str, default='', help='the index of training data')
     data.add_argument('--data-val', type=str, help='the validation data')
     data.add_argument('--data-val-idx', type=str, default='', help='the index of validation data')
     data.add_argument('--rgb-mean', type=str, default='123.68,116.779,103.939',
                       help='a tuple of size 3 for the mean rgb')
     data.add_argument('--rgb-std', type=str, default='1,1,1',
                       help='a tuple of size 3 for the std rgb')
     data.add_argument('--pad-size', type=int, default=0,
                       help='padding the input image')
     data.add_argument('--fill-value', type=int, default=127,
                       help='Set the padding pixels value to fill_value')
     data.add_argument('--image-shape', type=str,
                       help='the image shape feed into the network, e.g. (3,224,224)')
     data.add_argument('--num-classes', type=int, help='the number of classes')
     data.add_argument('--num-examples', type=int, help='the number of training examples')
     data.add_argument('--data-nthreads', type=int, default=4,
                       help='number of threads for data decoding')
     data.add_argument('--benchmark', type=int, default=0,
                       help='if 1, then feed the network with synthetic data')
     return data

 def add_data_aug_args(parser):
     aug = parser.add_argument_group(
         'Image augmentations', 'implemented in src/io/image_aug_default.cc')
     aug.add_argument('--random-crop', type=int, default=0,
                      help='if or not randomly crop the image')
     aug.add_argument('--random-mirror', type=int, default=0,
                      help='if or not randomly flip horizontally')
     aug.add_argument('--max-random-h', type=int, default=0,
                      help='max change of hue, whose range is [0, 180]')
     aug.add_argument('--max-random-s', type=int, default=0,
                      help='max change of saturation, whose range is [0, 255]')
     aug.add_argument('--max-random-l', type=int, default=0,
                      help='max change of intensity, whose range is [0, 255]')
     aug.add_argument('--min-random-aspect-ratio', type=float, default=None,
                      help='min value of aspect ratio, whose value is either None or a positive value.')
     aug.add_argument('--max-random-aspect-ratio', type=float, default=0,
                      help='max value of aspect ratio. If min_random_aspect_ratio is None, '
                           'the aspect ratio range is [1-max_random_aspect_ratio, '
                           '1+max_random_aspect_ratio], otherwise it is '
                           '[min_random_aspect_ratio, max_random_aspect_ratio].')
     aug.add_argument('--max-random-rotate-angle', type=int, default=0,
                      help='max angle to rotate, whose range is [0, 360]')
     aug.add_argument('--max-random-shear-ratio', type=float, default=0,
                      help='max ratio to shear, whose range is [0, 1]')
     aug.add_argument('--max-random-scale', type=float, default=1,
                      help='max ratio to scale')
     aug.add_argument('--min-random-scale', type=float, default=1,
                      help='min ratio to scale, should >= img_size/input_shape. '
                           'otherwise use --pad-size')
     aug.add_argument('--max-random-area', type=float, default=1,
                      help='max area to crop in random resized crop, whose range is [0, 1]')
     aug.add_argument('--min-random-area', type=float, default=1,
                      help='min area to crop in random resized crop, whose range is [0, 1]')
     aug.add_argument('--min-crop-size', type=int, default=-1,
                      help='Crop both width and height into a random size in '
                           '[min_crop_size, max_crop_size]')
     aug.add_argument('--max-crop-size', type=int, default=-1,
                      help='Crop both width and height into a random size in '
                           '[min_crop_size, max_crop_size]')
     aug.add_argument('--brightness', type=float, default=0,
                      help='brightness jittering, whose range is [0, 1]')
     aug.add_argument('--contrast', type=float, default=0,
                      help='contrast jittering, whose range is [0, 1]')
     aug.add_argument('--saturation', type=float, default=0,
                      help='saturation jittering, whose range is [0, 1]')
     aug.add_argument('--pca-noise', type=float, default=0,
                      help='pca noise, whose range is [0, 1]')
     aug.add_argument('--random-resized-crop', type=int, default=0,
                      help='whether to use random resized crop')
     return aug

 class SyntheticDataIter(DataIter):
     def __init__(self, num_classes, data_shape, max_iter, dtype):
         self.batch_size = data_shape[0]
         self.cur_iter = 0
         self.max_iter = max_iter
         self.dtype = dtype
         label = np.random.randint(0, num_classes, [self.batch_size,])
         data = np.random.uniform(-1, 1, data_shape)
         self.data = mx.nd.array(data, dtype=self.dtype, ctx=mx.Context('cpu_pinned', 0))
         self.label = mx.nd.array(label, dtype=self.dtype, ctx=mx.Context('cpu_pinned', 0))
     def __iter__(self):
         return self
     @property
     def provide_data(self):
         return [mx.io.DataDesc('data', self.data.shape, self.dtype)]
     @property
     def provide_label(self):
         return [mx.io.DataDesc('softmax_label', (self.batch_size,), self.dtype)]
     def next(self):
         self.cur_iter += 1
         if self.cur_iter <= self.max_iter:
             return DataBatch(data=(self.data,),
                              label=(self.label,),
                              pad=0,
                              index=None,
                              provide_data=self.provide_data,
                              provide_label=self.provide_label)
         else:
             raise StopIteration
     def __next__(self):
         return self.next()
     def reset(self):
         self.cur_iter = 0

 def get_rec_iter(args, kv=None):
     image_shape = tuple([int(l) for l in args.image_shape.split(',')])
     if 'benchmark' in args and args.benchmark:
         data_shape = (args.batch_size,) + image_shape
         train = SyntheticDataIter(args.num_classes, data_shape,
                 args.num_examples / args.batch_size, np.float32)
         return (train, None)
     if kv:
         (rank, nworker) = (kv.rank, kv.num_workers)
     else:
         (rank, nworker) = (0, 1)
     rgb_mean = [float(i) for i in args.rgb_mean.split(',')]
     rgb_std = [float(i) for i in args.rgb_std.split(',')]
     train = mx.io.ImageRecordIter(
         path_imgrec         = args.data_train,
         path_imgidx         = args.data_train_idx,
         label_width         = 1,
         mean_r              = rgb_mean[0],
         mean_g              = rgb_mean[1],
         mean_b              = rgb_mean[2],
         std_r               = rgb_std[0],
         std_g               = rgb_std[1],
         std_b               = rgb_std[2],
         data_name           = 'data',
         label_name          = 'softmax_label',
         data_shape          = image_shape,
         batch_size          = args.batch_size,
         rand_crop           = args.random_crop,
         max_random_scale    = args.max_random_scale,
         pad                 = args.pad_size,
         fill_value          = args.fill_value,
         random_resized_crop = args.random_resized_crop,
         min_random_scale    = args.min_random_scale,
         max_aspect_ratio    = args.max_random_aspect_ratio,
         min_aspect_ratio    = args.min_random_aspect_ratio,
         max_random_area     = args.max_random_area,
         min_random_area     = args.min_random_area,
         min_crop_size       = args.min_crop_size,
         max_crop_size       = args.max_crop_size,
         brightness          = args.brightness,
         contrast            = args.contrast,
         saturation          = args.saturation,
         pca_noise           = args.pca_noise,
         random_h            = args.max_random_h,
         random_s            = args.max_random_s,
         random_l            = args.max_random_l,
         max_rotate_angle    = args.max_random_rotate_angle,
         max_shear_ratio     = args.max_random_shear_ratio,
         rand_mirror         = args.random_mirror,
         preprocess_threads  = args.data_nthreads,
         shuffle             = True,
         num_parts           = nworker,
         part_index          = rank)
     if args.data_val is None:
         return (train, None)
     val = mx.io.ImageRecordIter(
         path_imgrec         = args.data_val,
         path_imgidx         = args.data_val_idx,
         label_width         = 1,
         mean_r              = rgb_mean[0],
         mean_g              = rgb_mean[1],
         mean_b              = rgb_mean[2],
         std_r               = rgb_std[0],
         std_g               = rgb_std[1],
         std_b               = rgb_std[2],
         resize              = 256,
         data_name           = 'data',
         label_name          = 'softmax_label',
         batch_size          = args.batch_size,
         data_shape          = image_shape,
         preprocess_threads  = args.data_nthreads,
         rand_crop           = False,
         rand_mirror         = False,
         num_parts           = nworker,
         part_index          = rank)
     return (train, val)
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	import mxnet as mx
	import random
	from mxnet.io import DataBatch, DataIter
	import numpy as np

	def add_data_args(parser):
	data = parser.add_argument_group('Data', 'the input images')
	data.add_argument('--data-train', type=str, help='the training data')
	data.add_argument('--data-train-idx', type=str, default='', help='the index of training data')
	data.add_argument('--data-val', type=str, help='the validation data')
	data.add_argument('--data-val-idx', type=str, default='', help='the index of validation data')
	data.add_argument('--rgb-mean', type=str, default='123.68,116.779,103.939',
	help='a tuple of size 3 for the mean rgb')
	data.add_argument('--rgb-std', type=str, default='1,1,1',
	help='a tuple of size 3 for the std rgb')
	data.add_argument('--pad-size', type=int, default=0,
	help='padding the input image')
	data.add_argument('--fill-value', type=int, default=127,
	help='Set the padding pixels value to fill_value')
	data.add_argument('--image-shape', type=str,
	help='the image shape feed into the network, e.g. (3,224,224)')
	data.add_argument('--num-classes', type=int, help='the number of classes')
	data.add_argument('--num-examples', type=int, help='the number of training examples')
	data.add_argument('--data-nthreads', type=int, default=4,
	help='number of threads for data decoding')
	data.add_argument('--benchmark', type=int, default=0,
	help='if 1, then feed the network with synthetic data')
	return data

	def add_data_aug_args(parser):
	aug = parser.add_argument_group(
	'Image augmentations', 'implemented in src/io/image_aug_default.cc')
	aug.add_argument('--random-crop', type=int, default=0,
	help='if or not randomly crop the image')
	aug.add_argument('--random-mirror', type=int, default=0,
	help='if or not randomly flip horizontally')
	aug.add_argument('--max-random-h', type=int, default=0,
	help='max change of hue, whose range is [0, 180]')
	aug.add_argument('--max-random-s', type=int, default=0,
	help='max change of saturation, whose range is [0, 255]')
	aug.add_argument('--max-random-l', type=int, default=0,
	help='max change of intensity, whose range is [0, 255]')
	aug.add_argument('--min-random-aspect-ratio', type=float, default=None,
	help='min value of aspect ratio, whose value is either None or a positive value.')
	aug.add_argument('--max-random-aspect-ratio', type=float, default=0,
	help='max value of aspect ratio. If min_random_aspect_ratio is None, '
	'the aspect ratio range is [1-max_random_aspect_ratio, '
	'1+max_random_aspect_ratio], otherwise it is '
	'[min_random_aspect_ratio, max_random_aspect_ratio].')
	aug.add_argument('--max-random-rotate-angle', type=int, default=0,
	help='max angle to rotate, whose range is [0, 360]')
	aug.add_argument('--max-random-shear-ratio', type=float, default=0,
	help='max ratio to shear, whose range is [0, 1]')
	aug.add_argument('--max-random-scale', type=float, default=1,
	help='max ratio to scale')
	aug.add_argument('--min-random-scale', type=float, default=1,
	help='min ratio to scale, should >= img_size/input_shape. '
	'otherwise use --pad-size')
	aug.add_argument('--max-random-area', type=float, default=1,
	help='max area to crop in random resized crop, whose range is [0, 1]')
	aug.add_argument('--min-random-area', type=float, default=1,
	help='min area to crop in random resized crop, whose range is [0, 1]')
	aug.add_argument('--min-crop-size', type=int, default=-1,
	help='Crop both width and height into a random size in '
	'[min_crop_size, max_crop_size]')
	aug.add_argument('--max-crop-size', type=int, default=-1,
	help='Crop both width and height into a random size in '
	'[min_crop_size, max_crop_size]')
	aug.add_argument('--brightness', type=float, default=0,
	help='brightness jittering, whose range is [0, 1]')
	aug.add_argument('--contrast', type=float, default=0,
	help='contrast jittering, whose range is [0, 1]')
	aug.add_argument('--saturation', type=float, default=0,
	help='saturation jittering, whose range is [0, 1]')
	aug.add_argument('--pca-noise', type=float, default=0,
	help='pca noise, whose range is [0, 1]')
	aug.add_argument('--random-resized-crop', type=int, default=0,
	help='whether to use random resized crop')
	return aug

	class SyntheticDataIter(DataIter):
	def __init__(self, num_classes, data_shape, max_iter, dtype):
	self.batch_size = data_shape[0]
	self.cur_iter = 0
	self.max_iter = max_iter
	self.dtype = dtype
	label = np.random.randint(0, num_classes, [self.batch_size,])
	data = np.random.uniform(-1, 1, data_shape)
	self.data = mx.nd.array(data, dtype=self.dtype, ctx=mx.Context('cpu_pinned', 0))
	self.label = mx.nd.array(label, dtype=self.dtype, ctx=mx.Context('cpu_pinned', 0))
	def __iter__(self):
	return self
	@property
	def provide_data(self):
	return [mx.io.DataDesc('data', self.data.shape, self.dtype)]
	@property
	def provide_label(self):
	return [mx.io.DataDesc('softmax_label', (self.batch_size,), self.dtype)]
	def next(self):
	self.cur_iter += 1
	if self.cur_iter <= self.max_iter:
	return DataBatch(data=(self.data,),
	label=(self.label,),
	pad=0,
	index=None,
	provide_data=self.provide_data,
	provide_label=self.provide_label)
	else:
	raise StopIteration
	def __next__(self):
	return self.next()
	def reset(self):
	self.cur_iter = 0

	def get_rec_iter(args, kv=None):
	image_shape = tuple([int(l) for l in args.image_shape.split(',')])
	if 'benchmark' in args and args.benchmark:
	data_shape = (args.batch_size,) + image_shape
	train = SyntheticDataIter(args.num_classes, data_shape,
	args.num_examples / args.batch_size, np.float32)
	return (train, None)
	if kv:
	(rank, nworker) = (kv.rank, kv.num_workers)
	else:
	(rank, nworker) = (0, 1)
	rgb_mean = [float(i) for i in args.rgb_mean.split(',')]
	rgb_std = [float(i) for i in args.rgb_std.split(',')]
	train = mx.io.ImageRecordIter(
	path_imgrec = args.data_train,
	path_imgidx = args.data_train_idx,
	label_width = 1,
	mean_r = rgb_mean[0],
	mean_g = rgb_mean[1],
	mean_b = rgb_mean[2],
	std_r = rgb_std[0],
	std_g = rgb_std[1],
	std_b = rgb_std[2],
	data_name = 'data',
	label_name = 'softmax_label',
	data_shape = image_shape,
	batch_size = args.batch_size,
	rand_crop = args.random_crop,
	max_random_scale = args.max_random_scale,
	pad = args.pad_size,
	fill_value = args.fill_value,
	random_resized_crop = args.random_resized_crop,
	min_random_scale = args.min_random_scale,
	max_aspect_ratio = args.max_random_aspect_ratio,
	min_aspect_ratio = args.min_random_aspect_ratio,
	max_random_area = args.max_random_area,
	min_random_area = args.min_random_area,
	min_crop_size = args.min_crop_size,
	max_crop_size = args.max_crop_size,
	brightness = args.brightness,
	contrast = args.contrast,
	saturation = args.saturation,
	pca_noise = args.pca_noise,
	random_h = args.max_random_h,
	random_s = args.max_random_s,
	random_l = args.max_random_l,
	max_rotate_angle = args.max_random_rotate_angle,
	max_shear_ratio = args.max_random_shear_ratio,
	rand_mirror = args.random_mirror,
	preprocess_threads = args.data_nthreads,
	shuffle = True,
	num_parts = nworker,
	part_index = rank)
	if args.data_val is None:
	return (train, None)
	val = mx.io.ImageRecordIter(
	path_imgrec = args.data_val,
	path_imgidx = args.data_val_idx,
	label_width = 1,
	mean_r = rgb_mean[0],
	mean_g = rgb_mean[1],
	mean_b = rgb_mean[2],
	std_r = rgb_std[0],
	std_g = rgb_std[1],
	std_b = rgb_std[2],
	resize = 256,
	data_name = 'data',
	label_name = 'softmax_label',
	batch_size = args.batch_size,
	data_shape = image_shape,
	preprocess_threads = args.data_nthreads,
	rand_crop = False,
	rand_mirror = False,
	num_parts = nworker,
	part_index = rank)
	return (train, val)