DRILL-8401: Skip nested MAP column without children when creating parquet tables (#2757)
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetRecordWriter.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetRecordWriter.java
index 5858b08..d58747c 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetRecordWriter.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetRecordWriter.java
@@ -253,6 +253,7 @@
private void newSchema() {
List<Type> types = new ArrayList<>();
for (MaterializedField field : batchSchema) {
+ pruneUnsupported(field);
if (!supportsField(field)) {
continue;
}
@@ -298,6 +299,21 @@
setUp(schema, consumer);
}
+ /**
+ * Recursively prunes childless MAPs from the field tree proceeding depth
+ * first so that fields that are rendered childless by removals of their
+ * descendants are eventually correctly removed themselves.
+ * @param field a top level field.
+ */
+ private void pruneUnsupported(MaterializedField field) {
+ for (MaterializedField child: field.getChildren()) {
+ pruneUnsupported(child);
+ if (!supportsField(child)) {
+ field.removeChild(child);
+ }
+ }
+ }
+
@Override
public boolean supportsField(MaterializedField field) {
return super.supportsField(field)
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java
index 707a552..002edf2 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java
@@ -1515,20 +1515,27 @@
}
}
- @Test
+ @Test // DRILL-8272
public void testResultWithEmptyMap() throws Exception {
String fileName = "emptyMap.json";
- FileUtils.writeStringToFile(new File(dirTestWatcher.getRootDir(), fileName),
- "{\"sample\": {}, \"a\": \"a\"}", Charset.defaultCharset());
+ // Create a test JSON object that includes two nested empty objects. The
+ // first has a string property as sibling so its parent, non_empty_child
+ // should not be eliminated from the output schema while the second has no
+ // siblings so its parent empty_child should be eliminated.
+ FileUtils.writeStringToFile(
+ new File(dirTestWatcher.getRootDir(), fileName),
+ "{\"non_empty_child\": { \"empty\": {}, \"b\": \"b\"}, \"empty_child\": { \"empty\": {} }, \"a\": \"a\"}",
+ Charset.defaultCharset()
+ );
run("create table dfs.tmp.t1 as SELECT * from dfs.`%s` t", fileName);
testBuilder()
.sqlQuery("select * from dfs.tmp.t1")
.unOrdered()
- .baselineColumns("a")
- .baselineValues("a")
+ .baselineColumns("a", "non_empty_child")
+ .baselineValues("a", mapOf("b", "b"))
.go();
}