3rdparty/mshadow/guide/neuralnet/nnet.cu - mxnet - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 // this implements a simple two layer neural net
 #include <vector>
 #include <cmath>
 // header file to use mshadow
 #include "mshadow/tensor.h"
 // helper function to load mnist dataset
 #include "util.h"
 // this namespace contains all data structures, functions
 using namespace mshadow;
 // this namespace contains all operator overloads
 using namespace mshadow::expr;

 // define sigmoid operation
 struct sigmoid{
   MSHADOW_XINLINE static real_t Map(real_t a) {
     return  1.0f/(1.0f+expf(-a));
   }
 };

 /*! \brief interface for nnet, interfacd allows use to use GPU/CPU implementation in a unified way */
 class INNet{
  public:
   virtual void Forward(const Tensor<cpu, 2, real_t>& inbatch, Tensor<cpu, 2, real_t> &oubatch) = 0;
   virtual void Backprop(const Tensor<cpu, 2, real_t>& gradout) = 0;
   virtual void Update(void) = 0;
   virtual ~INNet() {}
 };

 /*!
  * \brief simple two layer neural net
  *        this implementation is device invariant
  */
 template<typename xpu>
 class NNet : public INNet {
  public:
   // initialize the network
   NNet(int batch_size, int num_in, int num_hidden, int num_out) : rnd(0) {
     // setup stream
     Stream<xpu> *stream = NewStream<xpu>();
     ninput.set_stream(stream);
     nhidden.set_stream(stream);
     nhiddenbak.set_stream(stream);
     nout.set_stream(stream);
     hbias.set_stream(stream);
     g_hbias.set_stream(stream);
     g_obias.set_stream(stream);
     obias.set_stream(stream);
     Wi2h.set_stream(stream);
     Wh2o.set_stream(stream);
     g_Wi2h.set_stream(stream);
     g_Wh2o.set_stream(stream);
     // setup nodes
     ninput.Resize(Shape2(batch_size, num_in));
     nhidden.Resize(Shape2(batch_size, num_hidden));
     nhiddenbak.Resize(nhidden.shape_);
     nout.Resize(Shape2(batch_size, num_out));
     // setup bias
     hbias.Resize(Shape1(num_hidden)); g_hbias.Resize(hbias.shape_);
     obias.Resize(Shape1(num_out)); g_obias.Resize(obias.shape_);
     hbias = 0.0f; obias = 0.0f;
     // setup weights
     Wi2h.Resize(Shape2(num_in, num_hidden));  g_Wi2h.Resize(Wi2h.shape_);
     Wh2o.Resize(Shape2(num_hidden, num_out)); g_Wh2o.Resize(Wh2o.shape_);
     rnd.SampleGaussian(&Wi2h, 0, 0.01f);
     rnd.SampleGaussian(&Wh2o, 0, 0.01f);
   }
   virtual ~NNet() {}
   // forward propagation
   virtual void Forward(const Tensor<cpu, 2, real_t>& inbatch,
                        Tensor<cpu, 2, real_t> &oubatch) {
     // size is same conventsion as numpy
     index_t batch_size = inbatch.size(0);
     // copy data to input layer
     Copy(ninput, inbatch, ninput.stream_);
     // first layer, fullc
     nhidden = dot(ninput, Wi2h);
     nhidden+= repmat(hbias, batch_size);
     // activation, sigmloid, backup activation in nhidden
     nhidden = F<sigmoid>(nhidden);
     Copy(nhiddenbak, nhidden, nhiddenbak.stream_);
     // second layer fullc
     nout = dot(nhiddenbak, Wh2o);
     nout += repmat(obias, batch_size);
     // softmax calculation
     Softmax(nout, nout);
     // copy result out
     Copy(oubatch, nout, nout.stream_);
   }
   // back propagation
   virtual void Backprop(const Tensor<cpu, 2, real_t>& gradout) {
     // copy gradient to output layer
     Copy(nout, gradout, nout.stream_);
     // calc grad of layer 2
     g_obias = sum_rows(nout);
     g_Wh2o  = dot(nhiddenbak.T(), nout);
     // backprop to layer 1
     nhiddenbak = dot(nout, Wh2o.T());
     // calculate gradient of sigmoid layer
     nhidden = nhidden * (1.0f-nhidden) * nhiddenbak;
     // calc grad of layer 1
     g_hbias = sum_rows(nhidden);
     g_Wi2h  = dot(ninput.T(), nhidden);
   }
   // update weight
   virtual void Update(void) {
     // run SGD
     const float eta = 0.8;
     const float wd = 0.00001;
     // update weight
     Wi2h -= eta * (wd * Wi2h + g_Wi2h);
     Wh2o -= eta * (wd * Wh2o + g_Wh2o);
     // no regularization for bias
     hbias-= eta * g_hbias;
     obias-= eta * g_obias;
   }
  private:
   // random seed generator
   Random<xpu, real_t> rnd;
   // nodes in neural net
   TensorContainer<xpu, 2, real_t> ninput, nhidden, nhiddenbak, nout;
   // hidden bias, gradient
   TensorContainer<xpu, 1, real_t> hbias, obias, g_hbias, g_obias;
   // weight gradient
   TensorContainer<xpu, 2, real_t> Wi2h, Wh2o, g_Wi2h, g_Wh2o;
 };
 // helper function to get the max inde
 inline int MaxIndex(Tensor<cpu, 1, real_t> pred) {
   int maxidx = 0;
   for(index_t i = 1; i < pred.size(0); ++i) {
     if(pred[i] > pred[maxidx]) maxidx = (int)i;
   }
   return maxidx;
 }

 int main(int argc, char *argv[]) {
   if(argc < 2) {
     printf("Usage: cpu or gpu\n"); return 0;
   }
   srand(0);

   // settings
   int batch_size = 100;
   int num_in = 28 * 28;
   int num_hidden = 100;
   int num_out = 10;
   // choose which version to use
   INNet *net;
   if (!strcmp(argv[1], "gpu")) {
     InitTensorEngine<gpu>();
     net = new NNet<gpu>(batch_size, num_in, num_hidden, num_out);
   } else {
     InitTensorEngine<cpu>();
     net = new NNet<cpu>(batch_size, num_in, num_hidden, num_out);
   }

   // temp output layer
   TensorContainer<cpu, 2, real_t> pred;
   pred.Resize(Shape2(batch_size, num_out));

   // label
   std::vector<int> ytrain, ytest;
   // data
   TensorContainer<cpu,2> xtrain, xtest;
   LoadMNIST("train-images-idx3-ubyte", "train-labels-idx1-ubyte", ytrain, xtrain, true);
   LoadMNIST("t10k-images-idx3-ubyte", "t10k-labels-idx1-ubyte", ytest, xtest, false);

   int num_iter = 20;

   for (int i = 0; i < num_iter; ++ i) {
     // training
     for (index_t j = 0; j + batch_size <= xtrain.size(0); j += batch_size) {
       net->Forward(xtrain.Slice(j, j + batch_size), pred);
       // set gradient into pred
       for (int k = 0; k < batch_size; ++ k) {
         pred[k][ ytrain[k+j] ] -= 1.0f;
       }
       // scale gradient by batchs zie
       pred *= 1.0f / batch_size;
       // run backprop
       net->Backprop(pred);
       // update net parameters
       net->Update();
     }
     // evaluation
     long nerr = 0;
     for (index_t j = 0; j + batch_size <= xtest.size(0); j += batch_size) {
       net->Forward(xtest.Slice(j, j + batch_size), pred);
       for (int k = 0; k < batch_size; ++ k) {
         nerr += MaxIndex(pred[k]) != ytest[j+k];

       }
     }
     printf("round %d: test-err=%f\n", i, (float)nerr/xtest.size(0));
   }
   delete net;
   if (!strcmp(argv[1], "gpu")) {
     ShutdownTensorEngine<gpu>();
   } else {
     ShutdownTensorEngine<cpu>();
   }
   return 0;
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/

	// this implements a simple two layer neural net
	#include <vector>
	#include <cmath>
	// header file to use mshadow
	#include "mshadow/tensor.h"
	// helper function to load mnist dataset
	#include "util.h"
	// this namespace contains all data structures, functions
	using namespace mshadow;
	// this namespace contains all operator overloads
	using namespace mshadow::expr;

	// define sigmoid operation
	struct sigmoid{
	MSHADOW_XINLINE static real_t Map(real_t a) {
	return 1.0f/(1.0f+expf(-a));
	}
	};

	/! \brief interface for nnet, interfacd allows use to use GPU/CPU implementation in a unified way /
	class INNet{
	public:
	virtual void Forward(const Tensor<cpu, 2, real_t>& inbatch, Tensor<cpu, 2, real_t> &oubatch) = 0;
	virtual void Backprop(const Tensor<cpu, 2, real_t>& gradout) = 0;
	virtual void Update(void) = 0;
	virtual ~INNet() {}
	};

	/*!
	* \brief simple two layer neural net
	* this implementation is device invariant
	*/
	template<typename xpu>
	class NNet : public INNet {
	public:
	// initialize the network
	NNet(int batch_size, int num_in, int num_hidden, int num_out) : rnd(0) {
	// setup stream
	Stream<xpu> *stream = NewStream<xpu>();
	ninput.set_stream(stream);
	nhidden.set_stream(stream);
	nhiddenbak.set_stream(stream);
	nout.set_stream(stream);
	hbias.set_stream(stream);
	g_hbias.set_stream(stream);
	g_obias.set_stream(stream);
	obias.set_stream(stream);
	Wi2h.set_stream(stream);
	Wh2o.set_stream(stream);
	g_Wi2h.set_stream(stream);
	g_Wh2o.set_stream(stream);
	// setup nodes
	ninput.Resize(Shape2(batch_size, num_in));
	nhidden.Resize(Shape2(batch_size, num_hidden));
	nhiddenbak.Resize(nhidden.shape_);
	nout.Resize(Shape2(batch_size, num_out));
	// setup bias
	hbias.Resize(Shape1(num_hidden)); g_hbias.Resize(hbias.shape_);
	obias.Resize(Shape1(num_out)); g_obias.Resize(obias.shape_);
	hbias = 0.0f; obias = 0.0f;
	// setup weights
	Wi2h.Resize(Shape2(num_in, num_hidden)); g_Wi2h.Resize(Wi2h.shape_);
	Wh2o.Resize(Shape2(num_hidden, num_out)); g_Wh2o.Resize(Wh2o.shape_);
	rnd.SampleGaussian(&Wi2h, 0, 0.01f);
	rnd.SampleGaussian(&Wh2o, 0, 0.01f);
	}
	virtual ~NNet() {}
	// forward propagation
	virtual void Forward(const Tensor<cpu, 2, real_t>& inbatch,
	Tensor<cpu, 2, real_t> &oubatch) {
	// size is same conventsion as numpy
	index_t batch_size = inbatch.size(0);
	// copy data to input layer
	Copy(ninput, inbatch, ninput.stream_);
	// first layer, fullc
	nhidden = dot(ninput, Wi2h);
	nhidden+= repmat(hbias, batch_size);
	// activation, sigmloid, backup activation in nhidden
	nhidden = F<sigmoid>(nhidden);
	Copy(nhiddenbak, nhidden, nhiddenbak.stream_);
	// second layer fullc
	nout = dot(nhiddenbak, Wh2o);
	nout += repmat(obias, batch_size);
	// softmax calculation
	Softmax(nout, nout);
	// copy result out
	Copy(oubatch, nout, nout.stream_);
	}
	// back propagation
	virtual void Backprop(const Tensor<cpu, 2, real_t>& gradout) {
	// copy gradient to output layer
	Copy(nout, gradout, nout.stream_);
	// calc grad of layer 2
	g_obias = sum_rows(nout);
	g_Wh2o = dot(nhiddenbak.T(), nout);
	// backprop to layer 1
	nhiddenbak = dot(nout, Wh2o.T());
	// calculate gradient of sigmoid layer
	nhidden = nhidden * (1.0f-nhidden) * nhiddenbak;
	// calc grad of layer 1
	g_hbias = sum_rows(nhidden);
	g_Wi2h = dot(ninput.T(), nhidden);
	}
	// update weight
	virtual void Update(void) {
	// run SGD
	const float eta = 0.8;
	const float wd = 0.00001;
	// update weight
	Wi2h -= eta * (wd * Wi2h + g_Wi2h);
	Wh2o -= eta * (wd * Wh2o + g_Wh2o);
	// no regularization for bias
	hbias-= eta * g_hbias;
	obias-= eta * g_obias;
	}
	private:
	// random seed generator
	Random<xpu, real_t> rnd;
	// nodes in neural net
	TensorContainer<xpu, 2, real_t> ninput, nhidden, nhiddenbak, nout;
	// hidden bias, gradient
	TensorContainer<xpu, 1, real_t> hbias, obias, g_hbias, g_obias;
	// weight gradient
	TensorContainer<xpu, 2, real_t> Wi2h, Wh2o, g_Wi2h, g_Wh2o;
	};
	// helper function to get the max inde
	inline int MaxIndex(Tensor<cpu, 1, real_t> pred) {
	int maxidx = 0;
	for(index_t i = 1; i < pred.size(0); ++i) {
	if(pred[i] > pred[maxidx]) maxidx = (int)i;
	}
	return maxidx;
	}

	int main(int argc, char *argv[]) {
	if(argc < 2) {
	printf("Usage: cpu or gpu\n"); return 0;
	}
	srand(0);

	// settings
	int batch_size = 100;
	int num_in = 28 * 28;
	int num_hidden = 100;
	int num_out = 10;
	// choose which version to use
	INNet *net;
	if (!strcmp(argv[1], "gpu")) {
	InitTensorEngine<gpu>();
	net = new NNet<gpu>(batch_size, num_in, num_hidden, num_out);
	} else {
	InitTensorEngine<cpu>();
	net = new NNet<cpu>(batch_size, num_in, num_hidden, num_out);
	}

	// temp output layer
	TensorContainer<cpu, 2, real_t> pred;
	pred.Resize(Shape2(batch_size, num_out));

	// label
	std::vector<int> ytrain, ytest;
	// data
	TensorContainer<cpu,2> xtrain, xtest;
	LoadMNIST("train-images-idx3-ubyte", "train-labels-idx1-ubyte", ytrain, xtrain, true);
	LoadMNIST("t10k-images-idx3-ubyte", "t10k-labels-idx1-ubyte", ytest, xtest, false);

	int num_iter = 20;

	for (int i = 0; i < num_iter; ++ i) {
	// training
	for (index_t j = 0; j + batch_size <= xtrain.size(0); j += batch_size) {
	net->Forward(xtrain.Slice(j, j + batch_size), pred);
	// set gradient into pred
	for (int k = 0; k < batch_size; ++ k) {
	pred[k][ ytrain[k+j] ] -= 1.0f;
	}
	// scale gradient by batchs zie
	pred *= 1.0f / batch_size;
	// run backprop
	net->Backprop(pred);
	// update net parameters
	net->Update();
	}
	// evaluation
	long nerr = 0;
	for (index_t j = 0; j + batch_size <= xtest.size(0); j += batch_size) {
	net->Forward(xtest.Slice(j, j + batch_size), pred);
	for (int k = 0; k < batch_size; ++ k) {
	nerr += MaxIndex(pred[k]) != ytest[j+k];

	}
	}
	printf("round %d: test-err=%f\n", i, (float)nerr/xtest.size(0));
	}
	delete net;
	if (!strcmp(argv[1], "gpu")) {
	ShutdownTensorEngine<gpu>();
	} else {
	ShutdownTensorEngine<cpu>();
	}
	return 0;
	}