example/gluon/embedding_learning/data.py - mxnet - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 import os
 import random

 import numpy as np

 import mxnet as mx
 from mxnet import nd

 def transform(data, target_wd, target_ht, is_train, box):
     """Crop and normnalize an image nd array."""
     if box is not None:
         x, y, w, h = box
         data = data[y:min(y+h, data.shape[0]), x:min(x+w, data.shape[1])]

     # Resize to target_wd * target_ht.
     data = mx.image.imresize(data, target_wd, target_ht)

     # Normalize in the same way as the pre-trained model.
     data = data.astype(np.float32) / 255.0
     data = (data - mx.nd.array([0.485, 0.456, 0.406])) / mx.nd.array([0.229, 0.224, 0.225])

     if is_train:
         if random.random() < 0.5:
             data = nd.flip(data, axis=1)
         data, _ = mx.image.random_crop(data, (224, 224))
     else:
         data, _ = mx.image.center_crop(data, (224, 224))

     # Transpose from (target_wd, target_ht, 3)
     # to (3, target_wd, target_ht).
     data = nd.transpose(data, (2, 0, 1))

     # If image is greyscale, repeat 3 times to get RGB image.
     if data.shape[0] == 1:
         data = nd.tile(data, (3, 1, 1))
     return data.reshape((1,) + data.shape)


 class CUB200Iter(mx.io.DataIter):
     """Iterator for the CUB200-2011 dataset.
     Parameters
     ----------
     data_path : str,
         The path to dataset directory.
     batch_k : int,
         Number of images per class in a batch.
     batch_size : int,
         Batch size.
     batch_size : tupple,
         Data shape. E.g. (3, 224, 224).
     is_train : bool,
         Training data or testig data. Training batches are randomly sampled.
         Testing batches are loaded sequentially until reaching the end.
     """
     def __init__(self, data_path, batch_k, batch_size, data_shape, is_train):
         super(CUB200Iter, self).__init__(batch_size)
         self.data_shape = (batch_size,) + data_shape
         self.batch_size = batch_size
         self.provide_data = [('data', self.data_shape)]
         self.batch_k = batch_k
         self.is_train = is_train

         self.train_image_files = [[] for _ in range(100)]
         self.test_image_files = []
         self.test_labels = []
         self.boxes = {}
         self.test_count = 0

         with open(os.path.join(data_path, 'images.txt'), 'r') as f_img, \
              open(os.path.join(data_path, 'image_class_labels.txt'), 'r') as f_label, \
              open(os.path.join(data_path, 'bounding_boxes.txt'), 'r') as f_box:
             for line_img, line_label, line_box in zip(f_img, f_label, f_box):
                 fname = os.path.join(data_path, 'images', line_img.strip().split()[-1])
                 label = int(line_label.strip().split()[-1]) - 1
                 box = [int(float(v)) for v in line_box.split()[-4:]]
                 self.boxes[fname] = box

                 # Following "Deep Metric Learning via Lifted Structured Feature Embedding" paper,
                 # we use the first 100 classes for training, and the remaining for testing.
                 if label < 100:
                     self.train_image_files[label].append(fname)
                 else:
                     self.test_labels.append(label)
                     self.test_image_files.append(fname)

         self.n_test = len(self.test_image_files)

     def get_image(self, img, is_train):
         """Load and transform an image."""
         img_arr = mx.image.imread(img)
         img_arr = transform(img_arr, 256, 256, is_train, self.boxes[img])
         return img_arr

     def sample_train_batch(self):
         """Sample a training batch (data and label)."""
         batch = []
         labels = []
         num_groups = self.batch_size // self.batch_k

         # For CUB200, we use the first 100 classes for training.
         sampled_classes = np.random.choice(100, num_groups, replace=False)
         for i in range(num_groups):
             img_fnames = np.random.choice(self.train_image_files[sampled_classes[i]],
                                           self.batch_k, replace=False)
             batch += [self.get_image(img_fname, is_train=True) for img_fname in img_fnames]
             labels += [sampled_classes[i] for _ in range(self.batch_k)]

         return nd.concatenate(batch, axis=0), labels

     def get_test_batch(self):
         """Sample a testing batch (data and label)."""

         batch_size = self.batch_size
         batch = [self.get_image(self.test_image_files[(self.test_count*batch_size + i)
                                                       % len(self.test_image_files)],
                                 is_train=False) for i in range(batch_size)]
         labels = [self.test_labels[(self.test_count*batch_size + i)
                                    % len(self.test_image_files)] for i in range(batch_size)]
         return nd.concatenate(batch, axis=0), labels

     def reset(self):
         """Reset an iterator."""
         self.test_count = 0

     def next(self):
         """Return a batch."""
         if self.is_train:
             data, labels = self.sample_train_batch()
         else:
             if self.test_count * self.batch_size < len(self.test_image_files):
                 data, labels = self.get_test_batch()
                 self.test_count += 1
             else:
                 self.test_count = 0
                 raise StopIteration
         return mx.io.DataBatch(data=[data], label=[labels])

 def cub200_iterator(data_path, batch_k, batch_size, data_shape):
     """Return training and testing iterator for the CUB200-2011 dataset."""
     return (CUB200Iter(data_path, batch_k, batch_size, data_shape, is_train=True),
             CUB200Iter(data_path, batch_k, batch_size, data_shape, is_train=False))
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	import os
	import random

	import numpy as np

	import mxnet as mx
	from mxnet import nd

	def transform(data, target_wd, target_ht, is_train, box):
	"""Crop and normnalize an image nd array."""
	if box is not None:
	x, y, w, h = box
	data = data[y:min(y+h, data.shape[0]), x:min(x+w, data.shape[1])]

	# Resize to target_wd * target_ht.
	data = mx.image.imresize(data, target_wd, target_ht)

	# Normalize in the same way as the pre-trained model.
	data = data.astype(np.float32) / 255.0
	data = (data - mx.nd.array([0.485, 0.456, 0.406])) / mx.nd.array([0.229, 0.224, 0.225])

	if is_train:
	if random.random() < 0.5:
	data = nd.flip(data, axis=1)
	data, _ = mx.image.random_crop(data, (224, 224))
	else:
	data, _ = mx.image.center_crop(data, (224, 224))

	# Transpose from (target_wd, target_ht, 3)
	# to (3, target_wd, target_ht).
	data = nd.transpose(data, (2, 0, 1))

	# If image is greyscale, repeat 3 times to get RGB image.
	if data.shape[0] == 1:
	data = nd.tile(data, (3, 1, 1))
	return data.reshape((1,) + data.shape)


	class CUB200Iter(mx.io.DataIter):
	"""Iterator for the CUB200-2011 dataset.
	Parameters
	----------
	data_path : str,
	The path to dataset directory.
	batch_k : int,
	Number of images per class in a batch.
	batch_size : int,
	Batch size.
	batch_size : tupple,
	Data shape. E.g. (3, 224, 224).
	is_train : bool,
	Training data or testig data. Training batches are randomly sampled.
	Testing batches are loaded sequentially until reaching the end.
	"""
	def __init__(self, data_path, batch_k, batch_size, data_shape, is_train):
	super(CUB200Iter, self).__init__(batch_size)
	self.data_shape = (batch_size,) + data_shape
	self.batch_size = batch_size
	self.provide_data = [('data', self.data_shape)]
	self.batch_k = batch_k
	self.is_train = is_train

	self.train_image_files = [[] for _ in range(100)]
	self.test_image_files = []
	self.test_labels = []
	self.boxes = {}
	self.test_count = 0

	with open(os.path.join(data_path, 'images.txt'), 'r') as f_img, \
	open(os.path.join(data_path, 'image_class_labels.txt'), 'r') as f_label, \
	open(os.path.join(data_path, 'bounding_boxes.txt'), 'r') as f_box:
	for line_img, line_label, line_box in zip(f_img, f_label, f_box):
	fname = os.path.join(data_path, 'images', line_img.strip().split()[-1])
	label = int(line_label.strip().split()[-1]) - 1
	box = [int(float(v)) for v in line_box.split()[-4:]]
	self.boxes[fname] = box

	# Following "Deep Metric Learning via Lifted Structured Feature Embedding" paper,
	# we use the first 100 classes for training, and the remaining for testing.
	if label < 100:
	self.train_image_files[label].append(fname)
	else:
	self.test_labels.append(label)
	self.test_image_files.append(fname)

	self.n_test = len(self.test_image_files)

	def get_image(self, img, is_train):
	"""Load and transform an image."""
	img_arr = mx.image.imread(img)
	img_arr = transform(img_arr, 256, 256, is_train, self.boxes[img])
	return img_arr

	def sample_train_batch(self):
	"""Sample a training batch (data and label)."""
	batch = []
	labels = []
	num_groups = self.batch_size // self.batch_k

	# For CUB200, we use the first 100 classes for training.
	sampled_classes = np.random.choice(100, num_groups, replace=False)
	for i in range(num_groups):
	img_fnames = np.random.choice(self.train_image_files[sampled_classes[i]],
	self.batch_k, replace=False)
	batch += [self.get_image(img_fname, is_train=True) for img_fname in img_fnames]
	labels += [sampled_classes[i] for _ in range(self.batch_k)]

	return nd.concatenate(batch, axis=0), labels

	def get_test_batch(self):
	"""Sample a testing batch (data and label)."""

	batch_size = self.batch_size
	batch = [self.get_image(self.test_image_files[(self.test_count*batch_size + i)
	% len(self.test_image_files)],
	is_train=False) for i in range(batch_size)]
	labels = [self.test_labels[(self.test_count*batch_size + i)
	% len(self.test_image_files)] for i in range(batch_size)]
	return nd.concatenate(batch, axis=0), labels

	def reset(self):
	"""Reset an iterator."""
	self.test_count = 0

	def next(self):
	"""Return a batch."""
	if self.is_train:
	data, labels = self.sample_train_batch()
	else:
	if self.test_count * self.batch_size < len(self.test_image_files):
	data, labels = self.get_test_batch()
	self.test_count += 1
	else:
	self.test_count = 0
	raise StopIteration
	return mx.io.DataBatch(data=[data], label=[labels])

	def cub200_iterator(data_path, batch_k, batch_size, data_shape):
	"""Return training and testing iterator for the CUB200-2011 dataset."""
	return (CUB200Iter(data_path, batch_k, batch_size, data_shape, is_train=True),
	CUB200Iter(data_path, batch_k, batch_size, data_shape, is_train=False))