IMPALA-9132: Explain statements should not cause nullptr in
LogLineageRecord()
For DDLs LogLineageRecord() adds certain fields in the backend before
flushing the lineage. It uses ddl_exec_response() to get these fields.
However, explain is a special kind of DDL which does not have an
associated catalog_op_executor_. This causes explain statements to
throw NPE when ddl_exec_response() is called.
Currently, tools like atlas do not track lineages for explain
statements. This change skips lineage logging for explain statements.
In general, adds a nullptr check for catalog_op_executor_.
Testing:
Added a test to verify lineage is not created for explain statements.
Change-Id: Iccc20fd5a80841c820ebeb4edffccebea30df76e
Reviewed-on: http://gerrit.cloudera.org:8080/14646
Reviewed-by: Tim Armstrong <tarmstrong@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
diff --git a/be/src/service/client-request-state.cc b/be/src/service/client-request-state.cc
index a24671d..13bb433 100644
--- a/be/src/service/client-request-state.cc
+++ b/be/src/service/client-request-state.cc
@@ -1536,7 +1536,8 @@
Status ClientRequestState::LogLineageRecord() {
const TExecRequest& request = exec_request();
- if (!request.__isset.query_exec_request && !request.__isset.catalog_op_request) {
+ if (request.stmt_type == TStmtType::EXPLAIN || (!request.__isset.query_exec_request &&
+ !request.__isset.catalog_op_request)) {
return Status::OK();
}
TLineageGraph lineage_graph;
@@ -1550,7 +1551,7 @@
return Status::OK();
}
- if (catalog_op_type() == TCatalogOpType::DDL) {
+ if (catalog_op_executor_ != nullptr && catalog_op_type() == TCatalogOpType::DDL) {
const TDdlExecResponse* response = ddl_exec_response();
//Set table location in the lineage graph. Currently, this is only set for external
// tables in frontend.
diff --git a/tests/custom_cluster/test_lineage.py b/tests/custom_cluster/test_lineage.py
index 73f785d..8887632 100644
--- a/tests/custom_cluster/test_lineage.py
+++ b/tests/custom_cluster/test_lineage.py
@@ -132,6 +132,23 @@
assert lineage_json["queryText"] == query
assert lineage_json["tableLocation"] is not None
+ # Test explain statements don't create lineages.
+ query = "explain create table {0}.lineage_test_tbl as select int_col, " \
+ "tinyint_col from functional.alltypes".format(unique_database)
+ result = self.execute_query_expect_success(self.client, query)
+ profile_query_id = re.search("Query \(id=(.*)\):", result.runtime_profile).group(1)
+
+ # Wait to flush the lineage log files.
+ time.sleep(3)
+
+ for log_filename in os.listdir(self.DDL_LINEAGE_LOG_DIR):
+ log_path = os.path.join(self.DDL_LINEAGE_LOG_DIR, log_filename)
+ # Only the coordinator's log file will be populated.
+ if os.path.getsize(log_path) > 0:
+ with open(log_path) as log_file:
+ lineage_json = json.load(log_file)
+ assert lineage_json["queryId"] is not profile_query_id
+
@SkipIfABFS.hbase
@SkipIfADLS.hbase
@SkipIfS3.hbase