DRILL-7643: Fix issues with using columns with the same name as a reserved keyword closes #2028

commit: cee1b994a8b96c183e14360f4103583eb8ade475 [log] [tgz]
author: Volodymyr Vysotskyi <vvovyk@gmail.com> Mon Mar 16 20:41:35 2020 +0200
committer: Volodymyr Vysotskyi <vvovyk@gmail.com> Mon Mar 23 12:21:42 2020 +0200
tree: 6d9e912d2e2432c05e070b6662c681fbb929dcb4
parent: 233e047a227d8ad3c38665bf929c02762277088c [diff]
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/metastore/analyze/AnalyzeColumnUtils.java b/exec/java-exec/src/main/java/org/apache/drill/exec/metastore/analyze/AnalyzeColumnUtils.java
index feeeab7..1f97399 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/metastore/analyze/AnalyzeColumnUtils.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/metastore/analyze/AnalyzeColumnUtils.java

@@ -51,7 +51,7 @@
    * Returns actual column name obtained form intermediate name which includes statistics kind and other analyze-specific info.
    * <p>
    * Example: column which corresponds to max statistics value for {@code `o_shippriority`} column is {@code column$maxValue$`o_shippriority`}.
-   * This method will return actual column name: {@code `o_shippriority`}.
+   * This method will return escaped actual column name: {@code `o_shippriority`}.
    *
    * @param fullName the source of actual column name
    * @return actual column name

diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/metastore/analyze/FileMetadataInfoCollector.java b/exec/java-exec/src/main/java/org/apache/drill/exec/metastore/analyze/FileMetadataInfoCollector.java
index d9765d6..cc156f0 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/metastore/analyze/FileMetadataInfoCollector.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/metastore/analyze/FileMetadataInfoCollector.java

@@ -128,6 +128,7 @@
     List<SchemaPath> metastoreInterestingColumns =
         Optional.ofNullable(basicRequests.interestingColumnsAndPartitionKeys(tableInfo).interestingColumns())
             .map(metastoreInterestingColumnNames -> metastoreInterestingColumnNames.stream()
+                // interesting column names are escaped, so SchemaPath.parseFromString() should be used here
                 .map(SchemaPath::parseFromString)
                 .collect(Collectors.toList()))
         .orElse(null);

diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/RefreshMetadataHandler.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/RefreshMetadataHandler.java
index 7b6a19a..a2607a3 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/RefreshMetadataHandler.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/RefreshMetadataHandler.java

@@ -148,7 +148,11 @@
     if (columnList != null) {
       for (SqlNode column : columnList.getList()) {
         // Add only the root segment. Collect metadata for all the columns under that root segment
-        columnSet.add(SchemaPath.getSimplePath(SchemaPath.parseFromString(column.toString()).getRootSegmentPath()));
+        columnSet.add(
+            SchemaPath.getSimplePath(
+                SchemaPath.parseFromString(
+                        column.toSqlString(null, true).getSql())
+                    .getRootSegmentPath()));
       }
     }
     return columnSet;

diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/SqlMetastoreAnalyzeTable.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/SqlMetastoreAnalyzeTable.java
index a2bf8a9..a9ab526 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/SqlMetastoreAnalyzeTable.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/SqlMetastoreAnalyzeTable.java

@@ -127,8 +127,7 @@
     }
 
     return fieldList.getList().stream()
-        .map(SqlNode::toString)
-        .map(SchemaPath::parseFromString)
+        .map(sqlNode -> SchemaPath.parseFromString(sqlNode.toSqlString(null, true).getSql()))
         .collect(Collectors.toList());
   }
 

diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/StatisticsCollectorImpl.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/StatisticsCollectorImpl.java
index f2dcac9..96ef51b 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/StatisticsCollectorImpl.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/StatisticsCollectorImpl.java

@@ -240,6 +240,7 @@
       }
       switch (nextField) {
         case Statistic.COLNAME:
+          // column name is escaped, so SchemaPath.parseFromString() should be used here
           ((DrillStatsTable.ColumnStatistics_v1) columnStatistics).setName(SchemaPath.parseFromString(reader.readText().toString()));
           break;
         case Statistic.COLTYPE:

diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/RecordCollector.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/RecordCollector.java
index a672d19..da4f474 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/RecordCollector.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/RecordCollector.java

@@ -23,6 +23,7 @@
 import org.apache.calcite.schema.Schema;
 import org.apache.calcite.schema.SchemaPlus;
 import org.apache.calcite.schema.Table;
+import org.apache.commons.collections.CollectionUtils;
 import org.apache.commons.lang3.tuple.Pair;
 import org.apache.drill.common.expression.SchemaPath;
 import org.apache.drill.exec.ExecConstants;
@@ -318,7 +319,7 @@
      * @param schemaPath schema name
      * @param table table instance
      * @param schema table or column schema
-     * @param parentColumnName parent column name if any
+     * @param parentColumnNames list of parent column names if any
      * @param columnIndex column index if any
      * @param isNested indicates if column is nested
      * @return list of column records
@@ -326,28 +327,31 @@
     private List<Records.Column> columns(String schemaPath,
                                          BaseTableMetadata table,
                                          TupleMetadata schema,
-                                         String parentColumnName,
+                                         List<String> parentColumnNames,
                                          int columnIndex,
                                          boolean isNested) {
       List<Records.Column> records = new ArrayList<>();
       schema.toMetadataList().forEach(
         column -> {
-          // concat parent column name to use full column name, i.e. struct_col.nested_col
-          String columnName = parentColumnName == null ? column.name() : parentColumnName + "." + column.name();
+          List<String> columnNames = CollectionUtils.isEmpty(parentColumnNames) ? new ArrayList<>() : new ArrayList<>(parentColumnNames);
+          columnNames.add(column.name());
           // nested columns have the same index as their parent
           int currentIndex = columnIndex == UNDEFINED_INDEX ? schema.index(column.name()) : columnIndex;
           // if column is a map / struct, recursively scan nested columns
           if (column.isMap()) {
             List<Records.Column> mapRecords =
-              columns(schemaPath, table, column.tupleSchema(), columnName, currentIndex, true);
+              columns(schemaPath, table, column.tupleSchema(), columnNames, currentIndex, true);
             records.addAll(mapRecords);
           }
 
           String tableName = table.getTableInfo().name();
-          if (filterEvaluator.shouldVisitColumn(schemaPath, tableName, columnName)) {
+
+          // concat parent column names to use full column name, i.e. struct_col.nested_col
+          String columnPath = String.join(".", columnNames);
+          if (filterEvaluator.shouldVisitColumn(schemaPath, tableName, columnPath)) {
             ColumnStatistics<?> columnStatistics =
-              table.getColumnStatistics(SchemaPath.parseFromString(columnName));
-            records.add(new Records.Column(IS_CATALOG_NAME, schemaPath, tableName, columnName,
+              table.getColumnStatistics(SchemaPath.getCompoundPath(columnNames.toArray(new String[0])));
+            records.add(new Records.Column(IS_CATALOG_NAME, schemaPath, tableName, columnPath,
               column, columnStatistics, currentIndex, isNested));
           }
         });

diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestMetastoreCommands.java b/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestMetastoreCommands.java
index 9c63da5..92472cf 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestMetastoreCommands.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestMetastoreCommands.java

@@ -3227,21 +3227,21 @@
   public void testAnalyzeEmptyRequiredParquetTable() throws Exception {
     String tableName = "analyze_empty_simple_required";
 
-    run("create table dfs.tmp.%s as select 1 as id, 'a' as name from (values(1)) where 1 = 2", tableName);
+    run("create table dfs.tmp.%s as select 1 as `date`, 'a' as name from (values(1)) where 1 = 2", tableName);
 
     File table = new File(dirTestWatcher.getDfsTestTmpDir(), tableName);
 
     TableInfo tableInfo = getTableInfo(tableName, "tmp");
 
     TupleMetadata schema = new SchemaBuilder()
-        .add("id", TypeProtos.MinorType.INT)
+        .add("date", TypeProtos.MinorType.INT)
         .add("name", TypeProtos.MinorType.VARCHAR)
         .build();
 
     Map<SchemaPath, ColumnStatistics<?>> columnStatistics = ImmutableMap.<SchemaPath, ColumnStatistics<?>>builder()
         .put(SchemaPath.getSimplePath("name"),
             getColumnStatistics(null, null, 0L, TypeProtos.MinorType.VARCHAR))
-        .put(SchemaPath.getSimplePath("id"),
+        .put(SchemaPath.getSimplePath("date"),
             getColumnStatistics(null, null, 0L, TypeProtos.MinorType.INT))
         .build();
 
@@ -3300,6 +3300,14 @@
           .rowGroupsMetadata(tableInfo, (String) null, null);
 
       assertEquals(1, rowGroupsMetadata.size());
+
+      testBuilder()
+          .sqlQuery("select COLUMN_NAME from INFORMATION_SCHEMA.`COLUMNS` where table_name='%s'", tableName)
+          .unOrdered()
+          .baselineColumns("COLUMN_NAME")
+          .baselineValues("date")
+          .baselineValues("name")
+          .go();
     } finally {
       run("analyze table dfs.tmp.`%s` drop metadata if exists", tableName);
       run("drop table if exists dfs.tmp.`%s`", tableName);

diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetMetadataCache.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetMetadataCache.java
index e7e9c0c..2ecc3b5 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetMetadataCache.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetMetadataCache.java

@@ -944,9 +944,9 @@
 
   @Test
   public void testRefreshWithColumns() throws Exception {
-    test("refresh table metadata columns (o_custkey, o_orderdate) dfs.`%s`", TABLE_NAME_1);
+    test("refresh table metadata columns (`date`, o_orderdate) dfs.`%s`", TABLE_NAME_1);
     checkForMetadataFile(TABLE_NAME_1);
-    String query = String.format("select dir0, dir1, o_custkey, o_orderdate from dfs.`%s` " +
+    String query = String.format("select dir0, dir1, o_custkey as `date`, o_orderdate from dfs.`%s` " +
             " where dir0=1994 and dir1 in ('Q1', 'Q2')", TABLE_NAME_1);
     int expectedRowCount = 20;
     int actualRowCount = testSql(query);
commit	cee1b994a8b96c183e14360f4103583eb8ade475	[log] [tgz]
author	Volodymyr Vysotskyi <vvovyk@gmail.com>	Mon Mar 16 20:41:35 2020 +0200
committer	Volodymyr Vysotskyi <vvovyk@gmail.com>	Mon Mar 23 12:21:42 2020 +0200
tree	6d9e912d2e2432c05e070b6662c681fbb929dcb4
parent	233e047a227d8ad3c38665bf929c02762277088c [diff]