| /************************************************************ |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| * |
| *************************************************************/ |
| |
| #include "singa/model/feed_forward_net.h" |
| #include "singa/model/initializer.h" |
| #include "singa/utils/logging.h" |
| #include "singa/utils/channel.h" |
| namespace singa { |
| |
| FeedForwardNet::~FeedForwardNet() { |
| } |
| |
| std::shared_ptr<Layer> FeedForwardNet::Add(std::shared_ptr<Layer> layer) { |
| layers_.push_back(layer); |
| return layer; |
| } |
| |
| std::shared_ptr<Layer> FeedForwardNet::Add(const LayerConf& conf, |
| const Shape* sample_shape) { |
| std::shared_ptr<Layer> layer(CreateLayer(conf.type())); |
| CHECK(conf.has_name()) << "Must set layer name"; |
| if (sample_shape == nullptr) |
| layer->Setup(layers_.back()->GetOutputSampleShape(), conf); |
| else |
| layer->Setup(*sample_shape, conf); |
| Add(layer); |
| LOG(INFO) << layer->name() << VecToStr(layer->GetOutputSampleShape()); |
| return layer; |
| } |
| |
| const vector<string> FeedForwardNet::GetParamNames() const { |
| vector<string> names; |
| for (auto layer : layers_) |
| for (const auto name : layer->param_names()) names.push_back(name); |
| return names; |
| } |
| const vector<Tensor> FeedForwardNet::GetParamValues() const { |
| vector<Tensor> values; |
| for (auto layer : layers_) |
| for (const auto value : layer->param_values()) values.push_back(value); |
| return values; |
| } |
| |
| const vector<ParamSpec> FeedForwardNet::GetParamSpecs() const { |
| vector<ParamSpec> specs; |
| for (auto layer : layers_) |
| for (const auto spec : layer->param_specs()) specs.push_back(spec); |
| return specs; |
| } |
| |
| void FeedForwardNet::Compile(bool shuffle, Optimizer* opt, Loss* loss, |
| Metric* metric) { |
| std::shared_ptr<Updater> updater = std::make_shared<Updater>(opt); |
| Compile(shuffle, true, updater, loss, metric); |
| } |
| |
| void FeedForwardNet::Compile(bool shuffle, bool to_register, |
| std::shared_ptr<Updater> updater, Loss* loss, |
| Metric* metric) { |
| shuffle_ = shuffle; |
| bool train = (updater != nullptr) && (loss != nullptr); |
| bool test = metric != nullptr; |
| CHECK(train || test) << "Must set updater and loss, or set metric"; |
| updater_ = updater; |
| loss_ = loss; |
| metric_ = metric; |
| const auto specs = GetParamSpecs(); |
| auto params = GetParamValues(); |
| CHECK_EQ(specs.size(), params.size()); |
| for (size_t k = 0; k < specs.size(); k++) { |
| if (to_register) { |
| updater_->Register(specs[k].name(), specs[k]); |
| } |
| auto init = CreateInitializer(specs[k].filler()); |
| init->Fill(params[k]); |
| LOG(INFO) << specs[k].name() << " : " << params[k].L1(); |
| } |
| } |
| |
| void FeedForwardNet::ToDevice(std::shared_ptr<Device> device) { |
| for (auto layer : layers_) layer->ToDevice(device); |
| /* |
| opt_->ToDevice(device); |
| loss_->ToDevice(device); |
| metric_->ToDevice(device); |
| */ |
| } |
| |
| FeedForwardNet FeedForwardNet::Clone(std::shared_ptr<Device> device) { |
| FeedForwardNet net; |
| /* |
| for (auto layer: layers_) |
| net.layers_.push_back(layer->CloneTo(device)); |
| if (opt_ != nullptr) |
| net.opt_ = opt_->CloneTo(device); |
| if (loss_ != nullptr) |
| net.loss_ = loss_.CloneTo(device); |
| if (metric_ != nullptr) |
| net.metric_ = metric_->CloneTo(device); |
| net.shuffle_ = shuffle_; |
| net.device_ = device; |
| net.dtype_ = dtype; |
| */ |
| return net; |
| } |
| |
| void FeedForwardNet::AsType(DataType dtype) { |
| LOG(FATAL) << "FeedForwardNet::AsType not implemented"; |
| } |
| |
| void FeedForwardNet::Train(size_t batchsize, int nb_epoch, const Tensor& x, |
| const Tensor& y, float val_split) { |
| CHECK_EQ(x.shape(0), y.shape(0)) << "Diff num of sampels in x and y"; |
| size_t num_train = (size_t) (x.shape(0) * val_split); |
| if (val_split == 0.0f) { |
| Tensor dummy; |
| Train(batchsize, nb_epoch, x, y, dummy, dummy); |
| } else { |
| const Tensor train_x = CopyRows(x, 0, num_train); |
| const Tensor train_y = CopyRows(y, 0, num_train); |
| const Tensor test_x = CopyRows(x, num_train, x.shape(0)); |
| const Tensor test_y = CopyRows(y, num_train, y.shape(0)); |
| Train(batchsize, nb_epoch, train_x, train_y, test_x, test_y); |
| } |
| } |
| |
| void FeedForwardNet::Train(size_t batchsize, int nb_epoch, const Tensor& x, |
| const Tensor& y, const Tensor& val_x, |
| const Tensor& val_y) { |
| CHECK_EQ(x.shape(0), y.shape(0)) << "Diff num of sampels in x and y"; |
| int num_extra_samples = (int)x.shape(0) % batchsize; |
| if (num_extra_samples != 0) |
| LOG(WARNING) << "Pls set batchsize to make num_total_samples " |
| << "% batchsize == 0. Otherwise, the last " |
| << num_extra_samples << " samples would not be used"; |
| Channel* train_ch = GetChannel("train_perf"); |
| train_ch->EnableDestStderr(true); |
| Channel* val_ch = GetChannel("val_perf"); |
| val_ch->EnableDestStderr(true); |
| std::vector<size_t> index; |
| for (size_t i = 0; i < x.shape(0) / batchsize; i++) index.push_back(i); |
| for (int epoch = 0; epoch < nb_epoch; epoch++) { |
| if (shuffle_) std::random_shuffle(index.begin(), index.end()); |
| float loss = 0.0f, metric = 0.0f; |
| size_t b = 0; |
| for (; b < x.shape(0) / batchsize; b++) { |
| size_t idx = index[b]; |
| const Tensor bx = CopyRows(x, idx * batchsize, (idx + 1) * batchsize); |
| const Tensor by = CopyRows(y, idx * batchsize, (idx + 1) * batchsize); |
| const auto ret = TrainOnBatch(epoch, bx, by); |
| loss += ret.first; |
| metric += ret.second; |
| } |
| if (val_x.Size() == 0) continue; |
| loss /= b; |
| metric /= b; |
| train_ch->Send( |
| "Epoch " + std::to_string(epoch) + ", training loss = " + |
| std::to_string(loss) + ", accuracy = " + std::to_string(metric) + |
| ", lr = " + |
| std::to_string(updater_->GetOptimizer()->GetLearningRate(epoch))); |
| if (val_x.Size() && val_y.Size()) { |
| const auto val_perf = Evaluate(val_x, val_y, batchsize); |
| val_ch->Send("Epoch " + std::to_string(epoch) + ", val loss = " + |
| std::to_string(Sum(val_perf.first) / val_y.Size()) + |
| ", metric = " + |
| std::to_string(Sum(val_perf.second) / val_y.Size())); |
| } |
| } |
| } |
| |
| const std::pair<float, float> FeedForwardNet::TrainOnBatch(int epoch, |
| const Tensor& x, |
| const Tensor& y) { |
| int flag = kTrain; |
| const Tensor fea = Forward(flag, x); |
| float loss = loss_->Evaluate(flag, fea, y); |
| float metric = metric_->Evaluate(fea, y); |
| const Tensor grad = loss_->Backward(); |
| auto grads = Backward(kTrain, grad / static_cast<float>(x.shape(0))); |
| auto names = GetParamNames(); |
| auto values = GetParamValues(); |
| for (size_t k = 0; k < grads.size(); k++) { |
| updater_->Apply(epoch, names[k], grads[k], values.at(k)); |
| } |
| return std::make_pair(loss, metric); |
| } |
| |
| const Tensor FeedForwardNet::Forward(int flag, const Tensor& data) { |
| Tensor input = data, output; |
| // LOG(INFO) << data.L1(); |
| for (auto layer : layers_) { |
| output = layer->Forward(flag, input); |
| // LOG(INFO) << layer->name() << ": " << output.L2(); |
| input = output; |
| } |
| return output; |
| } |
| |
| const vector<Tensor> FeedForwardNet::Backward(int flag, const Tensor& grad) { |
| vector<Tensor> param_grads; |
| std::stack<Tensor> buf; |
| Tensor tmp = grad; |
| for (int i = (int)layers_.size() - 1; i >= 0; i--) { |
| // LOG(INFO) << layers_.at(i)->name() << " : " << tmp.L1(); |
| auto ret = layers_.at(i)->Backward(flag, tmp); |
| tmp = ret.first; |
| if (ret.second.size()) { |
| for (int k = (int)ret.second.size() - 1; k >= 0; k--) { |
| buf.push(ret.second[k]); |
| // LOG(INFO) << " " << buf.top().L1(); |
| } |
| } |
| } |
| while (!buf.empty()) { |
| param_grads.push_back(buf.top()); |
| buf.pop(); |
| } |
| return param_grads; |
| } |
| |
| std::pair<Tensor, Tensor> FeedForwardNet::Evaluate(const Tensor& x, |
| const Tensor& y, |
| size_t batchsize) { |
| CHECK_EQ(x.shape(0), y.shape(0)) << "Diff num of sampels in x and y"; |
| CHECK_GE(x.shape(0), batchsize); |
| int num_extra_samples = (int)x.shape(0) % batchsize; |
| Tensor loss(Shape{x.shape(0)}), metric(Shape{x.shape(0)}); |
| for (size_t b = 0; b < x.shape(0) / batchsize; b++) { |
| int start = (int)(b * batchsize), end = (int)(start + batchsize); |
| const Tensor bx = CopyRows(x, start, end); |
| const Tensor by = CopyRows(y, start, end); |
| const auto ret = EvaluateOnBatch(bx, by); |
| CopyDataToFrom(&loss, ret.first, batchsize, start, 0); |
| CopyDataToFrom(&metric, ret.second, batchsize, start, 0); |
| } |
| { |
| int start = (int)(x.shape(0) - batchsize), end = (int)x.shape(0); |
| const Tensor bx = CopyRows(x, start, end); |
| const Tensor by = CopyRows(y, start, end); |
| const auto ret = EvaluateOnBatch(bx, by); |
| int dst_offset = (int)(x.shape(0) - num_extra_samples); |
| int src_offset = (int)(batchsize - num_extra_samples); |
| CopyDataToFrom(&loss, ret.first, num_extra_samples, dst_offset, src_offset); |
| CopyDataToFrom(&metric, ret.second, num_extra_samples, dst_offset, |
| src_offset); |
| } |
| return std::make_pair(loss, metric); |
| } |
| |
| std::pair<Tensor, Tensor> FeedForwardNet::EvaluateOnBatch(const Tensor& x, |
| const Tensor& y) { |
| int flag = kEval; |
| const Tensor fea = Forward(flag, x); |
| const Tensor l = loss_->Forward(flag, fea, y); |
| const Tensor m = metric_->Forward(fea, y); |
| return std::make_pair(l, m); |
| } |
| |
| const Tensor FeedForwardNet::Predict(const Tensor& x, size_t batchsize) { |
| CHECK_GE(x.shape(0), batchsize); |
| int num_extra_samples = (int)(x.shape(0) % batchsize); |
| const auto outshape = layers_.back()->GetOutputSampleShape(); |
| Tensor y(Shape{x.shape(0), Product(outshape)}, x.device()); |
| for (size_t b = 0; b < x.shape(0) / batchsize; b++) { |
| int start = (int)(b * batchsize), end = (int)(start + batchsize); |
| const Tensor bx = CopyRows(x, start, end); |
| CopyDataToFrom(&y, PredictOnBatch(bx), batchsize * y.shape(1), |
| start * y.shape(1), 0); |
| } |
| if (num_extra_samples > 0) { |
| int start = (int)(x.shape(0) - batchsize), end = (int)(x.shape(0)); |
| const Tensor bx = CopyRows(x, start, end); |
| CopyDataToFrom(&y, PredictOnBatch(bx), num_extra_samples * y.shape(1), |
| (x.shape(0) - num_extra_samples) * y.shape(1), |
| (batchsize - num_extra_samples) * y.shape(1)); |
| } |
| return y; |
| } |
| |
| const Tensor FeedForwardNet::PredictOnBatch(const Tensor& x) { |
| return Forward(kEval, x); |
| } |
| } // namespace singa |