IMPALA-10527: Fix DiskIoMgrTest.WriteToRemotePartialFileSuccess failed in tsan build
Fixed a data race issue when running testcase
DiskIoMgrTest.WriteToRemotePartialFileSuccess in the tsan build. The
cause is the TmpFileBufferPool is not released gracefully while
destruction.
Tests:
Reran all testcases of DiskIoMgrTest with tsan and asan build.
Change-Id: I6b0435bf0ee580acb5553527c0ed4f3aa806707f
Reviewed-on: http://gerrit.cloudera.org:8080/17100
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
diff --git a/be/src/runtime/tmp-file-mgr-internal.h b/be/src/runtime/tmp-file-mgr-internal.h
index bd75239..1df4fe4 100644
--- a/be/src/runtime/tmp-file-mgr-internal.h
+++ b/be/src/runtime/tmp-file-mgr-internal.h
@@ -340,6 +340,9 @@
// from the available pool and make room for other files' buffer.
Status DequeueTmpFilesPool(std::shared_ptr<TmpFile>* tmp_file, bool quick_return);
+ // Shut down the pool before destruction.
+ void ShutDown();
+
private:
friend class TmpFileMgr;
friend class TmpFileMgrTest;
diff --git a/be/src/runtime/tmp-file-mgr.cc b/be/src/runtime/tmp-file-mgr.cc
index 676f646..cd00f42 100644
--- a/be/src/runtime/tmp-file-mgr.cc
+++ b/be/src/runtime/tmp-file-mgr.cc
@@ -168,7 +168,12 @@
TmpFileMgr::TmpFileMgr() {}
-TmpFileMgr::~TmpFileMgr() {}
+TmpFileMgr::~TmpFileMgr() {
+ if(tmp_dirs_remote_ctrl_.tmp_file_pool_ != nullptr) {
+ tmp_dirs_remote_ctrl_.tmp_file_pool_->ShutDown();
+ tmp_dirs_remote_ctrl_.tmp_file_mgr_thread_group_.JoinAll();
+ }
+}
Status TmpFileMgr::Init(MetricGroup* metrics) {
return InitCustom(FLAGS_scratch_dirs, !FLAGS_allow_multiple_scratch_dirs_per_device,
@@ -321,7 +326,7 @@
bool is_s3a_path = IsS3APath(tmp_path.c_str(), false);
if (is_hdfs_path || is_s3a_path) {
// Only support one remote dir.
- if (tmp_dirs_remote_ != nullptr) {
+ if (HasRemoteDir()) {
LOG(WARNING) << "Only one remote directory is supported. Directory "
<< tmp_path.c_str() << " is abandoned.";
continue;
@@ -379,7 +384,7 @@
std::sort(tmp_dirs_.begin(), tmp_dirs_.end(),
[](const TmpDir& a, const TmpDir& b) { return a.priority < b.priority; });
- if (tmp_dirs_remote_ != nullptr) {
+ if (HasRemoteDir()) {
if (local_buff_dir_ == nullptr) {
// Should at least have one local dir for the buffer. Later we might allow to use
// s3 fast upload directly without a buffer.
@@ -400,15 +405,15 @@
metrics->AddGauge(TMP_FILE_MGR_ACTIVE_SCRATCH_DIRS, 0);
active_scratch_dirs_metric_ = SetMetric<string>::CreateAndRegister(
metrics, TMP_FILE_MGR_ACTIVE_SCRATCH_DIRS_LIST, set<string>());
- if (tmp_dirs_remote_ == nullptr) {
- num_active_scratch_dirs_metric_->SetValue(tmp_dirs_.size());
- } else {
+ if (HasRemoteDir()) {
num_active_scratch_dirs_metric_->SetValue(tmp_dirs_.size() + 1);
+ } else {
+ num_active_scratch_dirs_metric_->SetValue(tmp_dirs_.size());
}
for (int i = 0; i < tmp_dirs_.size(); ++i) {
active_scratch_dirs_metric_->Add(tmp_dirs_[i].path);
}
- if (tmp_dirs_remote_ != nullptr) {
+ if (HasRemoteDir()) {
active_scratch_dirs_metric_->Add(tmp_dirs_remote_->path);
RETURN_IF_ERROR(CreateTmpFileBufferPoolThread(metrics));
}
@@ -1740,6 +1745,10 @@
}
TmpFileBufferPool::~TmpFileBufferPool() {
+ DCHECK(shut_down_);
+}
+
+void TmpFileBufferPool::ShutDown() {
{
unique_lock<mutex> l(lock_);
shut_down_ = true;