src/neuralnet/loss_layer/softmax.cc - singa - Git at Google

 /************************************************************
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 *
 *************************************************************/


 #include <glog/logging.h>
 #include <algorithm>
 #include "singa/neuralnet/loss_layer.h"
 #include "mshadow/tensor.h"
 #include "singa/utils/math_blob.h"

 namespace singa {

 using namespace mshadow;
 using mshadow::cpu;

 using mshadow::Shape;
 using mshadow::Shape1;
 using mshadow::Shape2;
 using mshadow::Tensor;

 using std::vector;

 void SoftmaxLossLayer::Setup(const LayerProto& proto,
     const vector<Layer*>& srclayers) {
   CHECK_EQ(srclayers.size(), 2);
   LossLayer::Setup(proto, srclayers);
   data_.Reshape(srclayers[0]->data(this).shape());
   batchsize_ = data_.shape()[0];
   dim_ = data_.count() / batchsize_;
   topk_ = proto.softmaxloss_conf().topk();
   scale_ = proto.softmaxloss_conf().scale();
 }

 void SoftmaxLossLayer::ComputeFeature(int flag,
     const vector<Layer*>& srclayers) {
   Shape<2> s = Shape2(batchsize_, dim_);
   Tensor<cpu, 2> prob(data_.mutable_cpu_data(), s);
   Tensor<cpu, 2> src(srclayers[0]->mutable_data(this)->mutable_cpu_data(), s);
   Softmax(prob, src);
   const auto& label = srclayers[1]->aux_data(this);
   const float* probptr = prob.dptr;
   float loss = 0, precision = 0;
   for (int n = 0; n < batchsize_; n++) {
     int ilabel = static_cast<int>(label[n]);
     //  CHECK_LT(ilabel,10);
     CHECK_GE(ilabel, 0);
     float prob_of_truth = probptr[ilabel];
     loss -= log(std::max(prob_of_truth, FLT_MIN));
     vector<std::pair<float, int> > probvec;
     for (int j = 0; j < dim_; ++j) {
       probvec.push_back(std::make_pair(probptr[j], j));
     }
     std::partial_sort(probvec.begin(), probvec.begin() + topk_, probvec.end(),
                       std::greater<std::pair<float, int> >());
     // check if true label is in top k predictions
     for (int k = 0; k < topk_; k++) {
       if (probvec[k].second == static_cast<int>(label[n])) {
         precision++;
         break;
       }
     }
     probptr += dim_;
   }
   CHECK_EQ(probptr, prob.dptr + prob.shape.Size());
   loss_ += loss * scale_ / (1.0f * batchsize_);
   accuracy_ += precision * scale_ / (1.0f * batchsize_);
   counter_++;
 }

 void SoftmaxLossLayer::ComputeGradient(int flag,
     const vector<Layer*>& srclayers) {
   const auto& label = srclayers[1]->aux_data();
   Blob<float>* gsrcblob = srclayers[0]->mutable_grad(this);
   Copy(data_, gsrcblob);
 //  gsrcblob->CopyFrom(data_);
   float* gsrcptr = gsrcblob->mutable_cpu_data();
   for (int n = 0; n < batchsize_; n++) {
     gsrcptr[n*dim_ + static_cast<int>(label[n])] -= 1.0f;
   }
   Tensor<cpu, 1> gsrc(gsrcptr, Shape1(gsrcblob->count()));
   gsrc *= scale_ / (1.0f * batchsize_);
 }

 const std::string SoftmaxLossLayer::ToString(bool debug, int flag) {
   if (debug)
     return Layer::ToString(debug, flag);

   string disp = "Loss = " + std::to_string(loss_ / counter_)
     + ", accuracy = " + std::to_string(accuracy_ / counter_);
   counter_ = 0;
   loss_ = accuracy_ = 0;
   return disp;
 }
 }  // namespace singa
	/************************************************************
	*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*
	*************************************************************/


	#include <glog/logging.h>
	#include <algorithm>
	#include "singa/neuralnet/loss_layer.h"
	#include "mshadow/tensor.h"
	#include "singa/utils/math_blob.h"

	namespace singa {

	using namespace mshadow;
	using mshadow::cpu;

	using mshadow::Shape;
	using mshadow::Shape1;
	using mshadow::Shape2;
	using mshadow::Tensor;

	using std::vector;

	void SoftmaxLossLayer::Setup(const LayerProto& proto,
	const vector<Layer*>& srclayers) {
	CHECK_EQ(srclayers.size(), 2);
	LossLayer::Setup(proto, srclayers);
	data_.Reshape(srclayers[0]->data(this).shape());
	batchsize_ = data_.shape()[0];
	dim_ = data_.count() / batchsize_;
	topk_ = proto.softmaxloss_conf().topk();
	scale_ = proto.softmaxloss_conf().scale();
	}

	void SoftmaxLossLayer::ComputeFeature(int flag,
	const vector<Layer*>& srclayers) {
	Shape<2> s = Shape2(batchsize_, dim_);
	Tensor<cpu, 2> prob(data_.mutable_cpu_data(), s);
	Tensor<cpu, 2> src(srclayers[0]->mutable_data(this)->mutable_cpu_data(), s);
	Softmax(prob, src);
	const auto& label = srclayers[1]->aux_data(this);
	const float* probptr = prob.dptr;
	float loss = 0, precision = 0;
	for (int n = 0; n < batchsize_; n++) {
	int ilabel = static_cast<int>(label[n]);
	// CHECK_LT(ilabel,10);
	CHECK_GE(ilabel, 0);
	float prob_of_truth = probptr[ilabel];
	loss -= log(std::max(prob_of_truth, FLT_MIN));
	vector<std::pair<float, int> > probvec;
	for (int j = 0; j < dim_; ++j) {
	probvec.push_back(std::make_pair(probptr[j], j));
	}
	std::partial_sort(probvec.begin(), probvec.begin() + topk_, probvec.end(),
	std::greater<std::pair<float, int> >());
	// check if true label is in top k predictions
	for (int k = 0; k < topk_; k++) {
	if (probvec[k].second == static_cast<int>(label[n])) {
	precision++;
	break;
	}
	}
	probptr += dim_;
	}
	CHECK_EQ(probptr, prob.dptr + prob.shape.Size());
	loss_ += loss * scale_ / (1.0f * batchsize_);
	accuracy_ += precision * scale_ / (1.0f * batchsize_);
	counter_++;
	}

	void SoftmaxLossLayer::ComputeGradient(int flag,
	const vector<Layer*>& srclayers) {
	const auto& label = srclayers[1]->aux_data();
	Blob<float>* gsrcblob = srclayers[0]->mutable_grad(this);
	Copy(data_, gsrcblob);
	// gsrcblob->CopyFrom(data_);
	float* gsrcptr = gsrcblob->mutable_cpu_data();
	for (int n = 0; n < batchsize_; n++) {
	gsrcptr[n*dim_ + static_cast<int>(label[n])] -= 1.0f;
	}
	Tensor<cpu, 1> gsrc(gsrcptr, Shape1(gsrcblob->count()));
	gsrc = scale_ / (1.0f batchsize_);
	}

	const std::string SoftmaxLossLayer::ToString(bool debug, int flag) {
	if (debug)
	return Layer::ToString(debug, flag);

	string disp = "Loss = " + std::to_string(loss_ / counter_)
	+ ", accuracy = " + std::to_string(accuracy_ / counter_);
	counter_ = 0;
	loss_ = accuracy_ = 0;
	return disp;
	}
	} // namespace singa