Merge pull request #667 from XJDKC/dev
Fix bugs of training
diff --git a/src/core/device/device.cc b/src/core/device/device.cc
index 458b944..4114601 100644
--- a/src/core/device/device.cc
+++ b/src/core/device/device.cc
@@ -50,8 +50,6 @@
bool previous_state = graph_enabled_;
graph_enabled_ = false;
- // graph_->Debug();
-
if (serial) {
// sequential execution
graph_->RunInSerial();
@@ -60,6 +58,8 @@
graph_->RunGraph();
}
+ // graph_->Debug();
+
graph_enabled_ = previous_state;
}
diff --git a/src/core/scheduler/scheduler.cc b/src/core/scheduler/scheduler.cc
index deecb68..2f0bb34 100644
--- a/src/core/scheduler/scheduler.cc
+++ b/src/core/scheduler/scheduler.cc
@@ -18,7 +18,10 @@
#include "singa/core/scheduler.h"
+#include <algorithm>
#include <functional>
+#include <iomanip>
+#include <sstream>
#include <thread>
#include <unordered_set>
@@ -99,39 +102,97 @@
}
void Graph::Debug() {
- for (size_t i = 0; i < nodes_.size(); ++i) {
- printf("OP[%2lu]: ", i);
- printf("Inputs: ");
- auto node = nodes_[i];
- for (size_t j = 0; j < node->in_edges_.size(); ++j) {
- printf("%d\t", blocks_[node->in_edges_[j]->blk_]->id_);
- }
- for (size_t j = node->in_edges_.size(); j < 3; ++j) {
- printf("\t");
- }
- printf("Outputs: ");
- for (size_t j = 0; j < node->out_edges_.size(); ++j) {
- printf("%d\t", blocks_[node->out_edges_[j]->blk_]->id_);
- }
- printf("\n");
+ if (dirty_) Analysis();
+
+ size_t max_in_num = 0, max_out_num = 0, max_next_num = 0, max_free_num = 0;
+ for (auto &it : nodes_) {
+ max_in_num = std::max(max_in_num, it->in_edges_.size());
+ max_out_num = std::max(max_out_num, it->out_edges_.size());
}
+ for (auto &it : next_nodes_) {
+ max_next_num = std::max(max_next_num, it.size());
+ }
+
+ for (auto &it : free_blocks_) {
+ max_free_num = std::max(max_free_num, it.size());
+ }
+
+ int w = 2;
+ std::stringstream ss;
+ ss << "begin nodes:[";
+ for (size_t i = 0; i < begin_nodes_.size(); ++i) {
+ ss << begin_nodes_[i]->id_;
+ }
+ ss << "]" << std::endl;
+
+ size_t size = 0;
+ for (size_t i = 0; i < nodes_.size(); ++i) {
+ ss << "OP[" << std::setw(w) << i;
+ auto node = nodes_[i];
+
+ ss << "] Inputs:[";
+ size = node->in_edges_.size();
+ for (size_t j = 0; j < max_in_num; ++j) {
+ if (j < size)
+ ss << std::setw(w) << blocks_[node->in_edges_[j]->blk_]->id_ << " ";
+ else
+ ss << std::setw(w + 1) << " ";
+ }
+
+ ss << "] Outputs:[";
+ size = node->out_edges_.size();
+ for (size_t j = 0; j < max_out_num; ++j) {
+ if (j < size)
+ ss << std::setw(w) << blocks_[node->out_edges_[j]->blk_]->id_ << " ";
+ else
+ ss << std::setw(w + 1) << " ";
+ }
+
+ ss << "] Next nodes:[";
+ size = next_nodes_[i].size();
+ for (size_t j = 0; j < max_next_num; ++j) {
+ if (j < size)
+ ss << std::setw(w) << next_nodes_[i][j]->id_ << " ";
+ else
+ ss << std::setw(w + 1) << " ";
+ }
+
+ ss << "] Free blocks:[";
+ size = free_blocks_[i].size();
+ for (size_t j = 0; j < max_free_num; ++j) {
+ if (j < size)
+ ss << std::setw(w) << blocks_[free_blocks_[i][j]]->id_ << " ";
+ else
+ ss << std::setw(w + 1) << " ";
+ }
+ ss << "]" << std::endl;
+ }
+
+ std::vector<BlkInfo *> blkInfos;
+ blkInfos.resize(blocks_.size());
+
for (auto it : blocks_) {
- auto blkInfo = it.second;
- printf("Block[%2d]: addr[%p] graph_ref[%d] ref_count[%d] ", blkInfo->id_,
- blkInfo->blk_, blkInfo->graph_ref_, it.first->ref_count());
+ blkInfos[it.second->id_] = it.second;
+ }
+
+ for (auto it : blkInfos) {
+ auto blkInfo = it;
+ ss << "Block[" << std::setw(w) << blkInfo->id_ << "] addr[" << std::setw(w)
+ << blkInfo->blk_ << "] graph_ref[" << std::setw(w) << blkInfo->graph_ref_
+ << "] ref_count[" << std::setw(w) << blkInfo->blk_->ref_count() << "] ";
switch (blkInfo->type_) {
case BlockType::kInput:
- printf("type[input] ");
+ ss << "type[input] ";
break;
case BlockType::kParam:
- printf("type[param] ");
+ ss << "type[param] ";
break;
case BlockType::kInter:
- printf("type[inter] ");
+ ss << "type[inter] ";
break;
case BlockType::kEnd:
- printf("type[_end_] ");
+ ss << "type[_end_] ";
break;
default:
break;
@@ -140,14 +201,16 @@
if (blkInfo->write_node_) {
id = blkInfo->write_node_->id_;
}
- printf(" write_node[%2d]", id);
+ ss << " write_node[" << std::setw(w) << id << "]";
id = -1;
if (blkInfo->last_node_) {
id = blkInfo->last_node_->id_;
}
- printf(" last_node[%2d]", id);
- printf("\n");
+ ss << " last_node[" << std::setw(w) << id << "]" << std::endl;
+ ;
}
+
+ printf("%s", ss.str().c_str());
}
void Graph::RunGraph() {
@@ -358,6 +421,8 @@
}
dirty_ = false;
+
+ // Debug();
}
void Graph::FreeLoop() {
diff --git a/src/core/tensor/tensor.cc b/src/core/tensor/tensor.cc
index 0e55776..2dfee71 100644
--- a/src/core/tensor/tensor.cc
+++ b/src/core/tensor/tensor.cc
@@ -161,6 +161,9 @@
// return new tensor
Tensor Tensor::AsType(const DataType type) {
+ CHECK(block() && block()->initialized() == true)
+ << "the data of the tensor needs be initialized before casting to "
+ "another type";
if (data_type_ != type) {
Tensor &thisRef = *this;
Tensor ret(shape_, device_, type);
@@ -1466,8 +1469,12 @@
template <typename SType>
void Mult(const SType alpha, const Tensor &A, const Tensor &B, const SType beta,
Tensor *C) {
+ Tensor fakeC;
vector<Block *> read_blocks = {A.block(), B.block()};
- // if (beta) read_blocks.push_back(C->block());
+ if (beta) {
+ fakeC = *C;
+ read_blocks.push_back(C->block());
+ }
if (B.nDim() == 1u) {
CHECK_EQ(A.shape().size(), 2u);
TYPE_LANG_SWITCH(A.data_type(), DType, A.device()->lang(), Lang, {
@@ -1475,7 +1482,7 @@
auto b = TypeCast<SType, DType>(beta);
Tensor &CRef = *C;
C->device()->Exec(
- [a, A, b, B, CRef](Context *ctx) mutable {
+ [a, A, b, B, CRef, fakeC](Context *ctx) mutable {
GEMV<DType, Lang>(a, A, B, b, &CRef, ctx);
},
read_blocks, {C->block()});
@@ -1488,7 +1495,7 @@
auto b = TypeCast<SType, DType>(beta);
Tensor &CRef = *C;
C->device()->Exec(
- [a, A, b, B, CRef](Context *ctx) mutable {
+ [a, A, b, B, CRef, fakeC](Context *ctx) mutable {
GEMM<DType, Lang>(a, A, B, b, &CRef, ctx);
},
read_blocks, {C->block()});
@@ -1523,7 +1530,7 @@
Tensor &CRef = *C;
C->device()->Exec(
- [a, A_tmp, b, B_tmp, CRef](Context *ctx) mutable {
+ [a, A_tmp, b, B_tmp, CRef, fakeC](Context *ctx) mutable {
GEMMBatched<DType, Lang>(a, A_tmp, B_tmp, b, &CRef, ctx);
},
read_blocks, {C->block()});
diff --git a/test/singa/test_cross_entropy.cc b/test/singa/test_cross_entropy.cc
index cd7e7ba..aa48e6f 100644
--- a/test/singa/test_cross_entropy.cc
+++ b/test/singa/test_cross_entropy.cc
@@ -42,8 +42,10 @@
TEST_F(TestSoftmaxCrossEntropy, CppForward) {
p.CopyDataFromHostPtr(pdat, 8);
- t.AsType(singa::kInt);
+ EXPECT_TRUE(p.block()->initialized());
t.CopyDataFromHostPtr(tdat, 2);
+ t.AsType(singa::kInt);
+
singa::SoftmaxCrossEntropy cross_entropy;
const Tensor& loss = cross_entropy.Forward(singa::kEval, p, t);
@@ -56,8 +58,8 @@
TEST_F(TestSoftmaxCrossEntropy, CppForwardAryTarget) {
p.CopyDataFromHostPtr(pdat, 8);
- ta.AsType(singa::kInt);
ta.CopyDataFromHostPtr(tary, 8);
+ ta.AsType(singa::kInt);
singa::SoftmaxCrossEntropy cross_entropy;
const Tensor& loss = cross_entropy.Forward(singa::kEval, p, ta);
@@ -70,8 +72,8 @@
TEST_F(TestSoftmaxCrossEntropy, CppBackward) {
p.CopyDataFromHostPtr(pdat, 8);
- t.AsType(singa::kInt);
t.CopyDataFromHostPtr(tdat, 2);
+ t.AsType(singa::kInt);
singa::SoftmaxCrossEntropy cross_entropy;
cross_entropy.Forward(singa::kTrain, p, t);
@@ -90,8 +92,8 @@
TEST_F(TestSoftmaxCrossEntropy, CppBackwardAryTarget) {
p.CopyDataFromHostPtr(pdat, 8);
- ta.AsType(singa::kInt);
ta.CopyDataFromHostPtr(tary, 8);
+ ta.AsType(singa::kInt);
singa::SoftmaxCrossEntropy cross_entropy;
cross_entropy.Forward(singa::kTrain, p, ta);
diff --git a/test/singa/test_platform.cc b/test/singa/test_platform.cc
index c38ef37..fce5f34 100644
--- a/test/singa/test_platform.cc
+++ b/test/singa/test_platform.cc
@@ -28,8 +28,10 @@
TEST(Platform, CreateMultDevice) {
int n = Platform::GetNumGPUs();
auto devs = Platform::CreateCudaGPUs(n);
- for (int i = 0; i < devs.size(); i++) {
+ for (size_t i = 0; i < devs.size(); i++) {
auto b = devs[i]->NewBlock(512 + 512 * (2 - i));
+ // for lazy allocation
+ b->mutable_data();
EXPECT_EQ(512 + 512 * (2 - i), devs[i]->GetAllocatedMem());
devs[i]->FreeBlock(b);
}
@@ -54,6 +56,8 @@
size_t size[] = {128, 256, 3, 24};
{
auto ptr = dev->NewBlock(size[0]);
+ // for lazy allocation
+ ptr->mutable_data();
auto allocated = dev->GetAllocatedMem();
EXPECT_LE(size[0], allocated);
dev->FreeBlock(ptr);
@@ -63,9 +67,13 @@
auto ptr0 = dev->NewBlock(size[0]);
auto ptr1 = dev->NewBlock(size[1]);
auto ptr2 = dev->NewBlock(size[2]);
+ ptr0->mutable_data();
+ ptr1->mutable_data();
+ ptr2->mutable_data();
auto allocated = dev->GetAllocatedMem();
EXPECT_LE(size[0] + size[1] + size[2], allocated);
auto ptr3 = dev->NewBlock(size[3]);
+ ptr3->mutable_data();
allocated = dev->GetAllocatedMem();
EXPECT_LE(size[0] + size[1] + size[2] + size[3], allocated);
dev->FreeBlock(ptr0);
diff --git a/test/singa/test_snapshot.cc b/test/singa/test_snapshot.cc
index 43c879c..ab1a69f 100644
--- a/test/singa/test_snapshot.cc
+++ b/test/singa/test_snapshot.cc
@@ -79,8 +79,8 @@
singa::Snapshot int_snapshot_write(prefix + ".int",
singa::Snapshot::kWrite);
singa::Tensor int_param(singa::Shape{4});
- int_param.AsType(singa::kInt);
int_param.CopyDataFromHostPtr(int_data, 4);
+ int_param.AsType(singa::kInt);
int_snapshot_write.Write("IntParam", int_param);
}