blob: b75b5a86605ea16f48a54ca394eb7f3ee1042dd2 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef IMPALA_SERVICE_CHILD_QUERY_H
#define IMPALA_SERVICE_CHILD_QUERY_H
#include <string>
#include <boost/thread/mutex.hpp>
#include "common/status.h"
#include "impala-server.h"
#include "gen-cpp/TCLIService_types.h"
namespace impala {
class ImpalaServer;
/// Child queries are used for implementing statements that consist of one or several
/// query statements (e.g., compute stats) that require independent query handles for
/// fetching results. Such queries are 'children' of a parent exec state in the sense
/// that they are executed in the same session and that child queries are cancelled if
/// the parent is cancelled (but not necessarily vice versa).
/// For simplicity and consistency, child queries are always executed via HiveServer2,
/// regardless of whether the parent session is Beeswax or HiveServer2.
//
/// Parent queries are expected to call ExecAndWait() of a child query in a
/// separate thread, and then join that thread to wait for child-query completion.
/// The parent ClientRequestState is independent of the child query's ClientRequestState,
/// with the exception that the child query selectively checks the parent's status
/// for failure/cancellation detection. Child queries should never call into their
/// parent's ClientRequestState to avoid deadlock.
//
/// TODO: Compute stats is the only stmt that requires child queries. Once the
/// CatalogService performs background stats gathering the concept of child queries
/// will likely become obsolete. Remove this class and all child-query related code.
class ChildQuery {
public:
ChildQuery(const std::string& query, ClientRequestState* parent_request_state,
ImpalaServer* parent_server, RuntimeProfile* profile, ObjectPool* profile_pool)
: query_(query),
parent_request_state_(parent_request_state),
parent_server_(parent_server),
profile_(profile),
profile_pool_(profile_pool),
is_running_(false),
is_cancelled_(false) {
DCHECK(!query_.empty());
DCHECK(parent_request_state_ != NULL);
DCHECK(parent_server_ != NULL);
}
/// Allow child queries to be added to std collections.
/// (boost::mutex's operator= and copy c'tor are private)
ChildQuery(const ChildQuery& other)
: query_(other.query_),
parent_request_state_(other.parent_request_state_),
parent_server_(other.parent_server_),
profile_(other.profile_),
profile_pool_(other.profile_pool_),
is_running_(other.is_running_),
is_cancelled_(other.is_cancelled_) {}
/// Allow child queries to be added to std collections.
/// (boost::mutex's operator= and copy c'tor are private)
ChildQuery& operator=(const ChildQuery& other) {
query_ = other.query_;
parent_request_state_ = other.parent_request_state_;
parent_server_ = other.parent_server_;
is_running_ = other.is_running_;
is_cancelled_ = other.is_cancelled_;
return *this;
}
/// Executes this child query through HiveServer2 and fetches all its results.
Status ExecAndFetch();
/// Cancels and closes the given child query if it is running. Sets is_cancelled_.
/// Child queries can be cancelled by the parent query through ClientRequestState::Cancel().
/// Child queries should never cancel their parent to avoid deadlock (but the parent
/// query may decide to cancel itself based on a non-OK status from a child query).
/// Note that child queries have a different ClientRequestState than their parent query,
/// so cancellation of a child query does not call into the parent's ClientRequestState.
void Cancel();
const apache::hive::service::cli::thrift::TTableSchema& result_schema() {
return meta_resp_.schema;
}
const apache::hive::service::cli::thrift::TRowSet& result_data() {
return fetch_resp_.results;
}
/// The key in the HS2 conf overlay which indicates to the executing ImpalaServer that
/// this query is a child query.
static const string PARENT_QUERY_OPT;
private:
/// Sets the query options from the parent query in child's HS2 request.
/// TODO: Consider moving this function into a more appropriate place.
void SetQueryOptions(const TQueryOptions& parent_options,
apache::hive::service::cli::thrift::TExecuteStatementReq* exec_stmt_req);
/// Returns Status::Cancelled if this child query has been cancelled, otherwise OK.
/// Acquires lock_.
Status IsCancelled();
/// SQL string to be executed.
std::string query_;
/// Execution state of parent query. Used to synchronize and propagate parent
/// cancellations/failures to this child query. Not owned.
ClientRequestState* parent_request_state_;
/// Parent Impala server used for executing this child query. Not owned.
ImpalaServer* parent_server_;
/// The profile for the query is retrieved after Close() and added as a child.
RuntimeProfile* profile_;
ObjectPool* profile_pool_;
/// Result metadata and result rows of query.
apache::hive::service::cli::thrift::TGetResultSetMetadataResp meta_resp_;
apache::hive::service::cli::thrift::TFetchResultsResp fetch_resp_;
/// HS2 query handle. Set in ExecChildQuery().
apache::hive::service::cli::thrift::TOperationHandle hs2_handle_;
/// Protects is_running_ and is_cancelled_ to ensure idempotent cancellations.
boost::mutex lock_;
/// Indicates whether this query is running. False if the query has not started yet
/// or if the query has finished either successfully or because of an error.
bool is_running_;
/// Indicates whether this child query has been cancelled. Set in Cancel().
bool is_cancelled_;
};
/// Asynchronously executes a set of child queries in a separate thread.
///
/// ExecAsync() is called at most once per executor to execute a set of child queries
/// asynchronously. After ExecAsync() is called, either WaitForAll() or Cancel() must be
/// called to ensure that the child queries are no longer executing before destroying the
/// object.
class ChildQueryExecutor {
public:
ChildQueryExecutor();
~ChildQueryExecutor();
/// Asynchronously executes 'child_queries' one by one in a new thread. 'child_queries'
/// must be non-empty. May clear or modify the 'child_queries' arg. Can only be called
/// once. Does nothing if Cancel() was already called.
Status ExecAsync(std::vector<ChildQuery>&& child_queries) WARN_UNUSED_RESULT;
/// Waits for all child queries to complete successfully or with an error. Returns a
/// non-OK status if a child query fails. Returns OK if ExecAsync() was not called,
/// Cancel() was called before an error occurred, or if all child queries finished
/// successfully. If returning OK, populates 'completed_queries' with the completed
/// queries. Any returned ChildQueries remain owned by the executor. Should not be
/// called concurrently with ExecAsync(). After WaitForAll() returns, the object can
/// safely be destroyed.
Status WaitForAll(std::vector<ChildQuery*>* completed_queries);
/// Cancels all child queries and prevents any more from starting. Returns once all
/// child queries are cancelled, after which the object can safely be destroyed. Can
/// be safely called concurrently with ExecAsync() or WaitForAll().
void Cancel();
private:
/// Serially executes the queries in child_queries_ by calling the child query's
/// ExecAndWait(). This function blocks until all queries complete and is run
/// in 'child_queries_thread_'.
/// Sets 'child_queries_status_'.
void ExecChildQueries();
/// Protects all fields below.
/// Should not be held at the same time as 'ChildQuery::lock_'.
SpinLock lock_;
/// True if cancellation of child queries has been initiated and no more child queries
/// should be started.
bool is_cancelled_;
/// True if 'child_queries_thread_' is in the process of executing child queries.
/// Set to false by 'child_queries_thread_' just before it exits. 'is_running_' must
/// be false when ChildQueryExecutor is destroyed: once execution is started,
/// WaitForAll() or Cancel() must be called to ensure the thread exits.
bool is_running_;
/// List of child queries to be executed. Not modified after it is initially populated,
/// so safe to read without holding 'lock_' if 'is_running_' or 'is_cancelled_' is
/// true, or 'child_queries_thread_' is non-NULL.
std::vector<ChildQuery> child_queries_;
/// Thread to execute 'child_queries_' in. Immutable after the first time it is set or
/// after 'is_cancelled_' is true.
std::unique_ptr<Thread> child_queries_thread_;
/// The status of the child queries. The status is OK iff all child queries complete
/// successfully. Otherwise, status contains the error of the first child query that
/// failed (child queries are executed serially and abort on the first error).
/// Immutable after 'child_queries_thread_' exits
Status child_queries_status_;
};
}
#endif