be/src/service/control-service.cc - impala - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.

 #include "service/control-service.h"

 #include "common/constant-strings.h"
 #include "common/thread-debug-info.h"
 #include "exec/kudu-util.h"
 #include "kudu/rpc/rpc_context.h"
 #include "kudu/rpc/rpc_controller.h"
 #include "rpc/rpc-mgr.h"
 #include "rpc/rpc-mgr.inline.h"
 #include "runtime/coordinator.h"
 #include "runtime/exec-env.h"
 #include "runtime/mem-tracker.h"
 #include "runtime/query-exec-mgr.h"
 #include "runtime/query-state.h"
 #include "service/client-request-state.h"
 #include "service/impala-server.h"
 #include "util/debug-util.h"
 #include "util/memory-metrics.h"
 #include "util/parse-util.h"
 #include "util/uid-util.h"

 #include "gen-cpp/ImpalaInternalService_types.h"
 #include "gen-cpp/RuntimeProfile_types.h"
 #include "gen-cpp/control_service.pb.h"
 #include "gen-cpp/control_service.proxy.h"

 #include "common/names.h"

 using kudu::rpc::RpcContext;

 static const string QUEUE_LIMIT_MSG = "(Advanced) Limit on RPC payloads consumption for "
     "ControlService. " + Substitute(MEM_UNITS_HELP_MSG, "the process memory limit");
 DEFINE_string(control_service_queue_mem_limit, "50MB", QUEUE_LIMIT_MSG.c_str());
 DEFINE_int32(control_service_num_svc_threads, 0, "Number of threads for processing "
     "control service's RPCs. if left at default value 0, it will be set to number of "
     "CPU cores. Set it to a positive value to change from the default.");
 DECLARE_string(debug_actions);

 namespace impala {

 ControlService::ControlService(MetricGroup* metric_group)
   : ControlServiceIf(ExecEnv::GetInstance()->rpc_mgr()->metric_entity(),
         ExecEnv::GetInstance()->rpc_mgr()->result_tracker()) {
   MemTracker* process_mem_tracker = ExecEnv::GetInstance()->process_mem_tracker();
   bool is_percent; // not used
   int64_t bytes_limit = ParseUtil::ParseMemSpec(FLAGS_control_service_queue_mem_limit,
       &is_percent, process_mem_tracker->limit());
   if (bytes_limit <= 0) {
     CLEAN_EXIT_WITH_ERROR(Substitute("Invalid mem limit for control service queue: "
         "'$0'.", FLAGS_control_service_queue_mem_limit));
   }
   mem_tracker_.reset(new MemTracker(
       bytes_limit, "Control Service Queue", process_mem_tracker));
   MemTrackerMetric::CreateMetrics(metric_group, mem_tracker_.get(), "ControlService");
 }

 Status ControlService::Init() {
   int num_svc_threads = FLAGS_control_service_num_svc_threads > 0 ?
       FLAGS_control_service_num_svc_threads : CpuInfo::num_cores();
   // The maximum queue length is set to maximum 32-bit value. Its actual capacity is
   // bound by memory consumption against 'mem_tracker_'.
   RETURN_IF_ERROR(ExecEnv::GetInstance()->rpc_mgr()->RegisterService(num_svc_threads,
       std::numeric_limits<int32_t>::max(), this, mem_tracker_.get()));
   return Status::OK();
 }

 Status ControlService::GetProxy(const TNetworkAddress& address, const string& hostname,
     unique_ptr<ControlServiceProxy>* proxy) {
   // Create a ControlService proxy to the destination.
   RETURN_IF_ERROR(ExecEnv::GetInstance()->rpc_mgr()->GetProxy(address, hostname, proxy));
   // Use a network plane different from DataStreamService's to avoid being blocked by
   // large payloads in DataStreamService.
   (*proxy)->set_network_plane("control");
   return Status::OK();
 }

 bool ControlService::Authorize(const google::protobuf::Message* req,
     google::protobuf::Message* resp, RpcContext* context) {
   return ExecEnv::GetInstance()->rpc_mgr()->Authorize("ControlService", context,
       mem_tracker_.get());
 }

 Status ControlService::GetProfile(const ReportExecStatusRequestPB& request,
     const ClientRequestState& request_state, RpcContext* rpc_context,
     TRuntimeProfileForest* thrift_profiles) {
   // Debug action to simulate deserialization failure.
   RETURN_IF_ERROR(DebugAction(request_state.query_options(),
       "REPORT_EXEC_STATUS_PROFILE"));
   kudu::Slice thrift_profiles_slice;
   KUDU_RETURN_IF_ERROR(rpc_context->GetInboundSidecar(
       request.thrift_profiles_sidecar_idx(), &thrift_profiles_slice),
       "Failed to get thrift profile sidecar");
   uint32_t len = thrift_profiles_slice.size();
   RETURN_IF_ERROR(DeserializeThriftMsg(thrift_profiles_slice.data(),
       &len, true, thrift_profiles));
   return Status::OK();
 }

 // Retrieves the sidecar at 'sidecar_idx' from 'rpc_context' and deserializes it into
 // 'thrift_obj'.
 template <typename T>
 static Status GetSidecar(int sidecar_idx, RpcContext* rpc_context, T* thrift_obj) {
   kudu::Slice sidecar_slice;
   KUDU_RETURN_IF_ERROR(rpc_context->GetInboundSidecar(sidecar_idx, &sidecar_slice),
       "Failed to get sidecar");
   uint32_t len = sidecar_slice.size();
   RETURN_IF_ERROR(DeserializeThriftMsg(sidecar_slice.data(), &len, true, thrift_obj));
   return Status::OK();
 }

 void ControlService::ExecQueryFInstances(const ExecQueryFInstancesRequestPB* request,
     ExecQueryFInstancesResponsePB* response, RpcContext* rpc_context) {
   DebugActionNoFail(FLAGS_debug_actions, "EXEC_QUERY_FINSTANCES_DELAY");
   DCHECK(request->has_coord_state_idx());
   DCHECK(request->has_plan_fragment_info_sidecar_idx());
   DCHECK(request->has_query_ctx_sidecar_idx());
   // Deserialize the sidecars. The QueryState will make a copy of the TQueryCtx and
   // TExecPlanFragmentInfo, so we can deallocate the deserialized values after
   // StartQuery(). TODO: can we avoid this extra copy?
   TExecPlanFragmentInfo fragment_info;
   const Status& fragment_info_sidecar_status =
       GetSidecar(request->plan_fragment_info_sidecar_idx(), rpc_context, &fragment_info);
   if (!fragment_info_sidecar_status.ok()) {
     RespondAndReleaseRpc(fragment_info_sidecar_status, response, rpc_context);
     return;
   }
   TQueryCtx query_ctx;
   const Status& query_ctx_sidecar_status =
       GetSidecar(request->query_ctx_sidecar_idx(), rpc_context, &query_ctx);
   if (!query_ctx_sidecar_status.ok()) {
     RespondAndReleaseRpc(query_ctx_sidecar_status, response, rpc_context);
     return;
   }
   ScopedThreadContext scoped_tdi(GetThreadDebugInfo(), query_ctx.query_id);
   VLOG_QUERY << "ExecQueryFInstances():"
              << " query_id=" << PrintId(query_ctx.query_id)
              << " coord=" << TNetworkAddressToString(query_ctx.coord_address)
              << " #instances=" << fragment_info.fragment_instance_ctxs.size();
   Status resp_status = ExecEnv::GetInstance()->query_exec_mgr()->StartQuery(
       request, query_ctx, fragment_info);
   if (!resp_status.ok()) {
     LOG(INFO) << "ExecQueryFInstances() failed: query_id=" << PrintId(query_ctx.query_id)
               << ": " << resp_status.GetDetail();
   }
   RespondAndReleaseRpc(resp_status, response, rpc_context);
 }

 void ControlService::ReportExecStatus(const ReportExecStatusRequestPB* request,
     ReportExecStatusResponsePB* response, RpcContext* rpc_context) {
   const TUniqueId query_id = ProtoToQueryId(request->query_id());
   shared_ptr<ClientRequestState> request_state =
       ExecEnv::GetInstance()->impala_server()->GetClientRequestState(query_id);

   // This failpoint is to allow jitter to be injected.
   DebugActionNoFail(FLAGS_debug_actions, "REPORT_EXEC_STATUS_DELAY");

   if (request_state.get() == nullptr) {
     // This is expected occasionally (since a report RPC might be in flight while
     // cancellation is happening). Return an error to the caller to get it to stop.
     const string& err = Substitute("ReportExecStatus(): Received report for unknown "
                                    "query ID (probably closed or cancelled): $0 "
                                    "remote host=$1",
         PrintId(query_id), rpc_context->remote_address().ToString());
     VLOG(1) << err;
     RespondAndReleaseRpc(Status::Expected(err), response, rpc_context);
     return;
   }

   // The runtime profile is sent as a Thrift serialized buffer via sidecar. Get the
   // sidecar and deserialize the thrift profile if there is any. The sender may have
   // failed to serialize the Thrift profile so an empty thrift profile is valid.
   // TODO: Fix IMPALA-7232 to indicate incomplete profile in this case.
   TRuntimeProfileForest thrift_profiles;
   if (LIKELY(request->has_thrift_profiles_sidecar_idx())) {
     const Status& profile_status =
         GetProfile(*request, *request_state.get(), rpc_context, &thrift_profiles);
     if (UNLIKELY(!profile_status.ok())) {
       LOG(ERROR) << Substitute("ReportExecStatus(): Failed to deserialize profile "
           "for query ID $0: $1", PrintId(request_state->query_id()),
           profile_status.GetDetail());
       // Do not expose a partially deserialized profile.
       TRuntimeProfileForest empty_profiles;
       swap(thrift_profiles, empty_profiles);
     }
   }

   Status resp_status = request_state->UpdateBackendExecStatus(*request, thrift_profiles);
   RespondAndReleaseRpc(resp_status, response, rpc_context);
 }

 template <typename ResponsePBType>
 void ControlService::RespondAndReleaseRpc(
     const Status& status, ResponsePBType* response, RpcContext* rpc_context) {
   status.ToProto(response->mutable_status());
   // Release the memory against the control service's memory tracker.
   mem_tracker_->Release(rpc_context->GetTransferSize());
   rpc_context->RespondSuccess();
 }

 void ControlService::CancelQueryFInstances(const CancelQueryFInstancesRequestPB* request,
     CancelQueryFInstancesResponsePB* response, RpcContext* rpc_context) {
   DCHECK(request->has_query_id());
   const TUniqueId& query_id = ProtoToQueryId(request->query_id());
   VLOG_QUERY << "CancelQueryFInstances(): query_id=" << PrintId(query_id);
   // This failpoint is to allow jitter to be injected.
   DebugActionNoFail(FLAGS_debug_actions, "CANCEL_QUERY_FINSTANCES_DELAY");
   QueryState::ScopedRef qs(query_id);
   if (qs.get() == nullptr) {
     Status status(ErrorMsg(TErrorCode::INTERNAL_ERROR,
         Substitute("Unknown query id: $0", PrintId(query_id))));
     RespondAndReleaseRpc(status, response, rpc_context);
     return;
   }
   qs->Cancel();
   RespondAndReleaseRpc(Status::OK(), response, rpc_context);
 }

 void ControlService::RemoteShutdown(const RemoteShutdownParamsPB* req,
     RemoteShutdownResultPB* response, RpcContext* rpc_context) {
   // This failpoint is to allow jitter to be injected.
   DebugActionNoFail(FLAGS_debug_actions, "REMOTE_SHUTDOWN_DELAY");
   Status status = ExecEnv::GetInstance()->impala_server()->StartShutdown(
       req->has_deadline_s() ? req->deadline_s() : -1,
       response->mutable_shutdown_status());

   RespondAndReleaseRpc(status, response, rpc_context);
 }
 }
	// Licensed to the Apache Software Foundation (ASF) under one
	// or more contributor license agreements. See the NOTICE file
	// distributed with this work for additional information
	// regarding copyright ownership. The ASF licenses this file
	// to you under the Apache License, Version 2.0 (the
	// "License"); you may not use this file except in compliance
	// with the License. You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing,
	// software distributed under the License is distributed on an
	// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	// KIND, either express or implied. See the License for the
	// specific language governing permissions and limitations
	// under the License.

	#include "service/control-service.h"

	#include "common/constant-strings.h"
	#include "common/thread-debug-info.h"
	#include "exec/kudu-util.h"
	#include "kudu/rpc/rpc_context.h"
	#include "kudu/rpc/rpc_controller.h"
	#include "rpc/rpc-mgr.h"
	#include "rpc/rpc-mgr.inline.h"
	#include "runtime/coordinator.h"
	#include "runtime/exec-env.h"
	#include "runtime/mem-tracker.h"
	#include "runtime/query-exec-mgr.h"
	#include "runtime/query-state.h"
	#include "service/client-request-state.h"
	#include "service/impala-server.h"
	#include "util/debug-util.h"
	#include "util/memory-metrics.h"
	#include "util/parse-util.h"
	#include "util/uid-util.h"

	#include "gen-cpp/ImpalaInternalService_types.h"
	#include "gen-cpp/RuntimeProfile_types.h"
	#include "gen-cpp/control_service.pb.h"
	#include "gen-cpp/control_service.proxy.h"

	#include "common/names.h"

	using kudu::rpc::RpcContext;

	static const string QUEUE_LIMIT_MSG = "(Advanced) Limit on RPC payloads consumption for "
	"ControlService. " + Substitute(MEM_UNITS_HELP_MSG, "the process memory limit");
	DEFINE_string(control_service_queue_mem_limit, "50MB", QUEUE_LIMIT_MSG.c_str());
	DEFINE_int32(control_service_num_svc_threads, 0, "Number of threads for processing "
	"control service's RPCs. if left at default value 0, it will be set to number of "
	"CPU cores. Set it to a positive value to change from the default.");
	DECLARE_string(debug_actions);

	namespace impala {

	ControlService::ControlService(MetricGroup* metric_group)
	: ControlServiceIf(ExecEnv::GetInstance()->rpc_mgr()->metric_entity(),
	ExecEnv::GetInstance()->rpc_mgr()->result_tracker()) {
	MemTracker* process_mem_tracker = ExecEnv::GetInstance()->process_mem_tracker();
	bool is_percent; // not used
	int64_t bytes_limit = ParseUtil::ParseMemSpec(FLAGS_control_service_queue_mem_limit,
	&is_percent, process_mem_tracker->limit());
	if (bytes_limit <= 0) {
	CLEAN_EXIT_WITH_ERROR(Substitute("Invalid mem limit for control service queue: "
	"'$0'.", FLAGS_control_service_queue_mem_limit));
	}
	mem_tracker_.reset(new MemTracker(
	bytes_limit, "Control Service Queue", process_mem_tracker));
	MemTrackerMetric::CreateMetrics(metric_group, mem_tracker_.get(), "ControlService");
	}

	Status ControlService::Init() {
	int num_svc_threads = FLAGS_control_service_num_svc_threads > 0 ?
	FLAGS_control_service_num_svc_threads : CpuInfo::num_cores();
	// The maximum queue length is set to maximum 32-bit value. Its actual capacity is
	// bound by memory consumption against 'mem_tracker_'.
	RETURN_IF_ERROR(ExecEnv::GetInstance()->rpc_mgr()->RegisterService(num_svc_threads,
	std::numeric_limits<int32_t>::max(), this, mem_tracker_.get()));
	return Status::OK();
	}

	Status ControlService::GetProxy(const TNetworkAddress& address, const string& hostname,
	unique_ptr<ControlServiceProxy>* proxy) {
	// Create a ControlService proxy to the destination.
	RETURN_IF_ERROR(ExecEnv::GetInstance()->rpc_mgr()->GetProxy(address, hostname, proxy));
	// Use a network plane different from DataStreamService's to avoid being blocked by
	// large payloads in DataStreamService.
	(*proxy)->set_network_plane("control");
	return Status::OK();
	}

	bool ControlService::Authorize(const google::protobuf::Message* req,
	google::protobuf::Message* resp, RpcContext* context) {
	return ExecEnv::GetInstance()->rpc_mgr()->Authorize("ControlService", context,
	mem_tracker_.get());
	}

	Status ControlService::GetProfile(const ReportExecStatusRequestPB& request,
	const ClientRequestState& request_state, RpcContext* rpc_context,
	TRuntimeProfileForest* thrift_profiles) {
	// Debug action to simulate deserialization failure.
	RETURN_IF_ERROR(DebugAction(request_state.query_options(),
	"REPORT_EXEC_STATUS_PROFILE"));
	kudu::Slice thrift_profiles_slice;
	KUDU_RETURN_IF_ERROR(rpc_context->GetInboundSidecar(
	request.thrift_profiles_sidecar_idx(), &thrift_profiles_slice),
	"Failed to get thrift profile sidecar");
	uint32_t len = thrift_profiles_slice.size();
	RETURN_IF_ERROR(DeserializeThriftMsg(thrift_profiles_slice.data(),
	&len, true, thrift_profiles));
	return Status::OK();
	}

	// Retrieves the sidecar at 'sidecar_idx' from 'rpc_context' and deserializes it into
	// 'thrift_obj'.
	template <typename T>
	static Status GetSidecar(int sidecar_idx, RpcContext* rpc_context, T* thrift_obj) {
	kudu::Slice sidecar_slice;
	KUDU_RETURN_IF_ERROR(rpc_context->GetInboundSidecar(sidecar_idx, &sidecar_slice),
	"Failed to get sidecar");
	uint32_t len = sidecar_slice.size();
	RETURN_IF_ERROR(DeserializeThriftMsg(sidecar_slice.data(), &len, true, thrift_obj));
	return Status::OK();
	}

	void ControlService::ExecQueryFInstances(const ExecQueryFInstancesRequestPB* request,
	ExecQueryFInstancesResponsePB* response, RpcContext* rpc_context) {
	DebugActionNoFail(FLAGS_debug_actions, "EXEC_QUERY_FINSTANCES_DELAY");
	DCHECK(request->has_coord_state_idx());
	DCHECK(request->has_plan_fragment_info_sidecar_idx());
	DCHECK(request->has_query_ctx_sidecar_idx());
	// Deserialize the sidecars. The QueryState will make a copy of the TQueryCtx and
	// TExecPlanFragmentInfo, so we can deallocate the deserialized values after
	// StartQuery(). TODO: can we avoid this extra copy?
	TExecPlanFragmentInfo fragment_info;
	const Status& fragment_info_sidecar_status =
	GetSidecar(request->plan_fragment_info_sidecar_idx(), rpc_context, &fragment_info);
	if (!fragment_info_sidecar_status.ok()) {
	RespondAndReleaseRpc(fragment_info_sidecar_status, response, rpc_context);
	return;
	}
	TQueryCtx query_ctx;
	const Status& query_ctx_sidecar_status =
	GetSidecar(request->query_ctx_sidecar_idx(), rpc_context, &query_ctx);
	if (!query_ctx_sidecar_status.ok()) {
	RespondAndReleaseRpc(query_ctx_sidecar_status, response, rpc_context);
	return;
	}
	ScopedThreadContext scoped_tdi(GetThreadDebugInfo(), query_ctx.query_id);
	VLOG_QUERY << "ExecQueryFInstances():"
	<< " query_id=" << PrintId(query_ctx.query_id)
	<< " coord=" << TNetworkAddressToString(query_ctx.coord_address)
	<< " #instances=" << fragment_info.fragment_instance_ctxs.size();
	Status resp_status = ExecEnv::GetInstance()->query_exec_mgr()->StartQuery(
	request, query_ctx, fragment_info);
	if (!resp_status.ok()) {
	LOG(INFO) << "ExecQueryFInstances() failed: query_id=" << PrintId(query_ctx.query_id)
	<< ": " << resp_status.GetDetail();
	}
	RespondAndReleaseRpc(resp_status, response, rpc_context);
	}

	void ControlService::ReportExecStatus(const ReportExecStatusRequestPB* request,
	ReportExecStatusResponsePB* response, RpcContext* rpc_context) {
	const TUniqueId query_id = ProtoToQueryId(request->query_id());
	shared_ptr<ClientRequestState> request_state =
	ExecEnv::GetInstance()->impala_server()->GetClientRequestState(query_id);

	// This failpoint is to allow jitter to be injected.
	DebugActionNoFail(FLAGS_debug_actions, "REPORT_EXEC_STATUS_DELAY");

	if (request_state.get() == nullptr) {
	// This is expected occasionally (since a report RPC might be in flight while
	// cancellation is happening). Return an error to the caller to get it to stop.
	const string& err = Substitute("ReportExecStatus(): Received report for unknown "
	"query ID (probably closed or cancelled): $0 "
	"remote host=$1",
	PrintId(query_id), rpc_context->remote_address().ToString());
	VLOG(1) << err;
	RespondAndReleaseRpc(Status::Expected(err), response, rpc_context);
	return;
	}

	// The runtime profile is sent as a Thrift serialized buffer via sidecar. Get the
	// sidecar and deserialize the thrift profile if there is any. The sender may have
	// failed to serialize the Thrift profile so an empty thrift profile is valid.
	// TODO: Fix IMPALA-7232 to indicate incomplete profile in this case.
	TRuntimeProfileForest thrift_profiles;
	if (LIKELY(request->has_thrift_profiles_sidecar_idx())) {
	const Status& profile_status =
	GetProfile(request, request_state.get(), rpc_context, &thrift_profiles);
	if (UNLIKELY(!profile_status.ok())) {
	LOG(ERROR) << Substitute("ReportExecStatus(): Failed to deserialize profile "
	"for query ID $0: $1", PrintId(request_state->query_id()),
	profile_status.GetDetail());
	// Do not expose a partially deserialized profile.
	TRuntimeProfileForest empty_profiles;
	swap(thrift_profiles, empty_profiles);
	}
	}

	Status resp_status = request_state->UpdateBackendExecStatus(*request, thrift_profiles);
	RespondAndReleaseRpc(resp_status, response, rpc_context);
	}

	template <typename ResponsePBType>
	void ControlService::RespondAndReleaseRpc(
	const Status& status, ResponsePBType* response, RpcContext* rpc_context) {
	status.ToProto(response->mutable_status());
	// Release the memory against the control service's memory tracker.
	mem_tracker_->Release(rpc_context->GetTransferSize());
	rpc_context->RespondSuccess();
	}

	void ControlService::CancelQueryFInstances(const CancelQueryFInstancesRequestPB* request,
	CancelQueryFInstancesResponsePB* response, RpcContext* rpc_context) {
	DCHECK(request->has_query_id());
	const TUniqueId& query_id = ProtoToQueryId(request->query_id());
	VLOG_QUERY << "CancelQueryFInstances(): query_id=" << PrintId(query_id);
	// This failpoint is to allow jitter to be injected.
	DebugActionNoFail(FLAGS_debug_actions, "CANCEL_QUERY_FINSTANCES_DELAY");
	QueryState::ScopedRef qs(query_id);
	if (qs.get() == nullptr) {
	Status status(ErrorMsg(TErrorCode::INTERNAL_ERROR,
	Substitute("Unknown query id: $0", PrintId(query_id))));
	RespondAndReleaseRpc(status, response, rpc_context);
	return;
	}
	qs->Cancel();
	RespondAndReleaseRpc(Status::OK(), response, rpc_context);
	}

	void ControlService::RemoteShutdown(const RemoteShutdownParamsPB* req,
	RemoteShutdownResultPB* response, RpcContext* rpc_context) {
	// This failpoint is to allow jitter to be injected.
	DebugActionNoFail(FLAGS_debug_actions, "REMOTE_SHUTDOWN_DELAY");
	Status status = ExecEnv::GetInstance()->impala_server()->StartShutdown(
	req->has_deadline_s() ? req->deadline_s() : -1,
	response->mutable_shutdown_status());

	RespondAndReleaseRpc(status, response, rpc_context);
	}
	}