cpp-package/example/test_score.cpp - mxnet - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 /*!
  * Xin Li yakumolx@gmail.com
  * The file is used for testing if the score(accurary) we get
  * is better than the threshold we set using mlp model.
  * By running: build/test_score 0.75
  * 0.75 here means the threshold score
  * It return 0 if we can achieve higher score than threshold, otherwise 1
  */
 #include <chrono>
 #include "utils.h"
 #include "mxnet-cpp/MxNetCpp.h"

 using namespace mxnet::cpp;

 Symbol mlp(const std::vector<int> &layers) {
   auto x = Symbol::Variable("X");
   auto label = Symbol::Variable("label");

   std::vector<Symbol> weights(layers.size());
   std::vector<Symbol> biases(layers.size());
   std::vector<Symbol> outputs(layers.size());

   for (size_t i = 0; i < layers.size(); ++i) {
     weights[i] = Symbol::Variable("w" + std::to_string(i));
     biases[i] = Symbol::Variable("b" + std::to_string(i));
     Symbol fc = FullyConnected(
       i == 0? x : outputs[i-1],  // data
       weights[i],
       biases[i],
       layers[i]);
     outputs[i] = i == layers.size()-1? fc : Activation(fc, ActivationActType::kRelu);
   }

   return SoftmaxOutput(outputs.back(), label);
 }

 int main(int argc, char** argv) {
   const float MIN_SCORE = std::stof(argv[1]);

   const int image_size = 28;
   const std::vector<int> layers{128, 64, 10};
   const int batch_size = 100;
   const int max_epoch = 10;
   const float learning_rate = 0.1;
   const float weight_decay = 1e-2;
   float score = 0;

   std::vector<std::string> data_files = { "./data/mnist_data/train-images-idx3-ubyte",
                                           "./data/mnist_data/train-labels-idx1-ubyte",
                                           "./data/mnist_data/t10k-images-idx3-ubyte",
                                           "./data/mnist_data/t10k-labels-idx1-ubyte"
                                         };

   auto train_iter =  MXDataIter("MNISTIter");
   if (!setDataIter(&train_iter, "Train", data_files, batch_size)) {
     return 1;
   }

   auto val_iter = MXDataIter("MNISTIter");
   if (!setDataIter(&val_iter, "Label", data_files, batch_size)) {
     return 1;
   }

   TRY
   auto net = mlp(layers);

   Context ctx = Context::gpu();  // Use GPU for training
 #if MXNET_USE_CPU
   ctx = Context::cpu();
 #endif

   std::map<std::string, NDArray> args;
   args["X"] = NDArray(Shape(batch_size, image_size*image_size), ctx);
   args["label"] = NDArray(Shape(batch_size), ctx);
   // Let MXNet infer shapes of other parameters such as weights
   net.InferArgsMap(ctx, &args, args);

   // Initialize all parameters with uniform distribution U(-0.01, 0.01)
   auto initializer = Uniform(0.01);
   for (auto& arg : args) {
     // arg.first is parameter name, and arg.second is the value
     initializer(arg.first, &arg.second);
   }

   // Create sgd optimizer
   Optimizer* opt = OptimizerRegistry::Find("sgd");
   opt->SetParam("rescale_grad", 1.0/batch_size)
      ->SetParam("lr", learning_rate)
      ->SetParam("wd", weight_decay);
   std::unique_ptr<LRScheduler> lr_sch(new FactorScheduler(5000, 0.1));
   opt->SetLRScheduler(std::move(lr_sch));

   // Create executor by binding parameters to the model
   auto *exec = net.SimpleBind(ctx, args);
   auto arg_names = net.ListArguments();

   // Start training
   for (int iter = 0; iter < max_epoch; ++iter) {
     int samples = 0;
     train_iter.Reset();

     auto tic = std::chrono::system_clock::now();
     while (train_iter.Next()) {
       samples += batch_size;
       auto data_batch = train_iter.GetDataBatch();
       // Data provided by DataIter are stored in memory, should be copied to GPU first.
       data_batch.data.CopyTo(&args["X"]);
       data_batch.label.CopyTo(&args["label"]);
       // CopyTo is imperative, need to wait for it to complete.
       NDArray::WaitAll();

       // Compute gradients
       exec->Forward(true);
       exec->Backward();
       // Update parameters
       for (size_t i = 0; i < arg_names.size(); ++i) {
         if (arg_names[i] == "X" || arg_names[i] == "label") continue;
         opt->Update(i, exec->arg_arrays[i], exec->grad_arrays[i]);
       }
     }
     auto toc = std::chrono::system_clock::now();

     Accuracy acc;
     val_iter.Reset();
     while (val_iter.Next()) {
       auto data_batch = val_iter.GetDataBatch();
       data_batch.data.CopyTo(&args["X"]);
       data_batch.label.CopyTo(&args["label"]);
       NDArray::WaitAll();
       // Only forward pass is enough as no gradient is needed when evaluating
       exec->Forward(false);
       acc.Update(data_batch.label, exec->outputs[0]);
     }
     float duration = std::chrono::duration_cast<std::chrono::milliseconds>
                      (toc - tic).count() / 1000.0;
     LG << "Epoch: " << iter << " " << samples/duration << " samples/sec Accuracy: " << acc.Get();
     score = acc.Get();
   }

   delete exec;
   delete opt;
   MXNotifyShutdown();
   CATCH
   return score >= MIN_SCORE ? 0 : 1;
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/

	/*!
	* Xin Li yakumolx@gmail.com
	* The file is used for testing if the score(accurary) we get
	* is better than the threshold we set using mlp model.
	* By running: build/test_score 0.75
	* 0.75 here means the threshold score
	* It return 0 if we can achieve higher score than threshold, otherwise 1
	*/
	#include <chrono>
	#include "utils.h"
	#include "mxnet-cpp/MxNetCpp.h"

	using namespace mxnet::cpp;

	Symbol mlp(const std::vector<int> &layers) {
	auto x = Symbol::Variable("X");
	auto label = Symbol::Variable("label");

	std::vector<Symbol> weights(layers.size());
	std::vector<Symbol> biases(layers.size());
	std::vector<Symbol> outputs(layers.size());

	for (size_t i = 0; i < layers.size(); ++i) {
	weights[i] = Symbol::Variable("w" + std::to_string(i));
	biases[i] = Symbol::Variable("b" + std::to_string(i));
	Symbol fc = FullyConnected(
	i == 0? x : outputs[i-1], // data
	weights[i],
	biases[i],
	layers[i]);
	outputs[i] = i == layers.size()-1? fc : Activation(fc, ActivationActType::kRelu);
	}

	return SoftmaxOutput(outputs.back(), label);
	}

	int main(int argc, char** argv) {
	const float MIN_SCORE = std::stof(argv[1]);

	const int image_size = 28;
	const std::vector<int> layers{128, 64, 10};
	const int batch_size = 100;
	const int max_epoch = 10;
	const float learning_rate = 0.1;
	const float weight_decay = 1e-2;
	float score = 0;

	std::vector<std::string> data_files = { "./data/mnist_data/train-images-idx3-ubyte",
	"./data/mnist_data/train-labels-idx1-ubyte",
	"./data/mnist_data/t10k-images-idx3-ubyte",
	"./data/mnist_data/t10k-labels-idx1-ubyte"
	};

	auto train_iter = MXDataIter("MNISTIter");
	if (!setDataIter(&train_iter, "Train", data_files, batch_size)) {
	return 1;
	}

	auto val_iter = MXDataIter("MNISTIter");
	if (!setDataIter(&val_iter, "Label", data_files, batch_size)) {
	return 1;
	}

	TRY
	auto net = mlp(layers);

	Context ctx = Context::gpu(); // Use GPU for training
	#if MXNET_USE_CPU
	ctx = Context::cpu();
	#endif

	std::map<std::string, NDArray> args;
	args["X"] = NDArray(Shape(batch_size, image_size*image_size), ctx);
	args["label"] = NDArray(Shape(batch_size), ctx);
	// Let MXNet infer shapes of other parameters such as weights
	net.InferArgsMap(ctx, &args, args);

	// Initialize all parameters with uniform distribution U(-0.01, 0.01)
	auto initializer = Uniform(0.01);
	for (auto& arg : args) {
	// arg.first is parameter name, and arg.second is the value
	initializer(arg.first, &arg.second);
	}

	// Create sgd optimizer
	Optimizer* opt = OptimizerRegistry::Find("sgd");
	opt->SetParam("rescale_grad", 1.0/batch_size)
	->SetParam("lr", learning_rate)
	->SetParam("wd", weight_decay);
	std::unique_ptr<LRScheduler> lr_sch(new FactorScheduler(5000, 0.1));
	opt->SetLRScheduler(std::move(lr_sch));

	// Create executor by binding parameters to the model
	auto *exec = net.SimpleBind(ctx, args);
	auto arg_names = net.ListArguments();

	// Start training
	for (int iter = 0; iter < max_epoch; ++iter) {
	int samples = 0;
	train_iter.Reset();

	auto tic = std::chrono::system_clock::now();
	while (train_iter.Next()) {
	samples += batch_size;
	auto data_batch = train_iter.GetDataBatch();
	// Data provided by DataIter are stored in memory, should be copied to GPU first.
	data_batch.data.CopyTo(&args["X"]);
	data_batch.label.CopyTo(&args["label"]);
	// CopyTo is imperative, need to wait for it to complete.
	NDArray::WaitAll();

	// Compute gradients
	exec->Forward(true);
	exec->Backward();
	// Update parameters
	for (size_t i = 0; i < arg_names.size(); ++i) {
	if (arg_names[i] == "X" \|\| arg_names[i] == "label") continue;
	opt->Update(i, exec->arg_arrays[i], exec->grad_arrays[i]);
	}
	}
	auto toc = std::chrono::system_clock::now();

	Accuracy acc;
	val_iter.Reset();
	while (val_iter.Next()) {
	auto data_batch = val_iter.GetDataBatch();
	data_batch.data.CopyTo(&args["X"]);
	data_batch.label.CopyTo(&args["label"]);
	NDArray::WaitAll();
	// Only forward pass is enough as no gradient is needed when evaluating
	exec->Forward(false);
	acc.Update(data_batch.label, exec->outputs[0]);
	}
	float duration = std::chrono::duration_cast<std::chrono::milliseconds>
	(toc - tic).count() / 1000.0;
	LG << "Epoch: " << iter << " " << samples/duration << " samples/sec Accuracy: " << acc.Get();
	score = acc.Get();
	}

	delete exec;
	delete opt;
	MXNotifyShutdown();
	CATCH
	return score >= MIN_SCORE ? 0 : 1;
	}