DRILL-8141: Ability to query XML root attributes (#2884)
diff --git a/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLReader.java b/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLReader.java
index dbb8f45..1d85851 100644
--- a/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLReader.java
+++ b/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLReader.java
@@ -279,6 +279,13 @@
if (!rowStarted) {
currentTupleWriter = startRow(rootRowWriter);
+
+ Iterator<Attribute> attributes = startElement.getAttributes();
+ if (attributes != null && attributes.hasNext()) {
+ // This would be the root element, so the attribute prefix would simply be the field name.
+ writeAttributes(fieldName, attributes);
+ }
+
} else {
if (lastEvent != null &&
lastEvent.getEventType() == XMLStreamConstants.START_ELEMENT) {
@@ -333,6 +340,7 @@
// Get the field value
fieldValue = currentEvent.asCharacters().getData().trim();
changeState(xmlState.GETTING_DATA);
+ changeState(xmlState.GETTING_DATA);
break;
case XMLStreamConstants.END_ELEMENT:
diff --git a/contrib/format-xml/src/test/java/org/apache/drill/exec/store/xml/TestXMLReader.java b/contrib/format-xml/src/test/java/org/apache/drill/exec/store/xml/TestXMLReader.java
index ca3e4bd..1283337 100644
--- a/contrib/format-xml/src/test/java/org/apache/drill/exec/store/xml/TestXMLReader.java
+++ b/contrib/format-xml/src/test/java/org/apache/drill/exec/store/xml/TestXMLReader.java
@@ -90,6 +90,34 @@
}
@Test
+ public void testAttributesOnRoot() throws Exception {
+ String sql = "SELECT * FROM table(cp.`xml/no_nest.xml` (type => 'xml', dataLevel => 1))";
+ RowSet results = client.queryBuilder().sql(sql).rowSet();
+ assertEquals(1, results.rowCount());
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .addMap("attributes")
+ .addNullable("PPP_Version", MinorType.VARCHAR)
+ .addNullable("PPP_TimeStamp", MinorType.VARCHAR)
+ .addNullable("P1_SubVersion", MinorType.VARCHAR)
+ .addNullable("P1_MID", MinorType.VARCHAR)
+ .addNullable("P1_PN", MinorType.VARCHAR)
+ .addNullable("P1_SL", MinorType.VARCHAR)
+ .addNullable("P2_SubVersion", MinorType.VARCHAR)
+ .resumeSchema()
+ .addNullable("P1", MinorType.VARCHAR)
+ .addMap("P2")
+ .addNullable("Color", MinorType.VARCHAR)
+ .resumeSchema()
+ .buildSchema();
+
+ RowSet expected = client.rowSetBuilder(expectedSchema)
+ .addRow(strArray("2023-001", "2023-06-09T21:17:14.416+02:00", "a1", "XX003", "156", "3", "b1"), null, strArray("blue"))
+ .build();
+
+ new RowSetComparison(expected).verifyAndClearAll(results);
+ }
+
+ @Test
public void testXXE() throws Exception {
String sql = "SELECT * FROM cp.`xml/bad.xml`";
try {
@@ -178,9 +206,14 @@
String sql = "SELECT * FROM cp.`xml/weather.xml`";
RowSet results = client.queryBuilder().sql(sql).rowSet();
assertEquals(1, results.rowCount());
-
TupleMetadata expectedSchema = new SchemaBuilder()
.addMap("attributes")
+ .addNullable("weather_module_id", MinorType.VARCHAR)
+ .addNullable("weather_tab_id", MinorType.VARCHAR)
+ .addNullable("weather_mobile_row", MinorType.VARCHAR)
+ .addNullable("weather_mobile_zipped", MinorType.VARCHAR)
+ .addNullable("weather_row", MinorType.VARCHAR)
+ .addNullable("weather_section", MinorType.VARCHAR)
.addNullable("forecast_information_city_data", MinorType.VARCHAR)
.addNullable("forecast_information_postal_code_data", MinorType.VARCHAR)
.addNullable("forecast_information_latitude_e6_data", MinorType.VARCHAR)
@@ -211,7 +244,8 @@
.build();
RowSet expected = client.rowSetBuilder(expectedSchema)
- .addRow(strArray("Seattle, WA", "Seattle WA", "", "", "2011-09-29", "2011-09-29 17:53:00 +0000", "US", "Clear", "62", "17", "Humidity: 62%", "/ig/images/weather" +
+ .addRow(strArray("0", "0", "0", "1", "0", "0","Seattle, WA", "Seattle WA", "", "", "2011-09-29", "2011-09-29 17:53:00 +0000", "US", "Clear",
+ "62", "17", "Humidity: 62%", "/ig/images/weather" +
"/sunny.gif", "Wind: N at 4 mph"), null, null, null, null, null, null, null, null, null, null, null, null, null)
.build();
diff --git a/contrib/format-xml/src/test/resources/xml/no_nest.xml b/contrib/format-xml/src/test/resources/xml/no_nest.xml
new file mode 100644
index 0000000..e38dccb
--- /dev/null
+++ b/contrib/format-xml/src/test/resources/xml/no_nest.xml
@@ -0,0 +1,23 @@
+<!--
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+-->
+<PPP Version="2023-001" TimeStamp="2023-06-09T21:17:14.416+02:00">
+ <P1 SubVersion="a1" MID="XX003" PN="156" SL="3"/>
+ <P2 SubVersion="b1"><Color>blue</Color></P2>
+</PPP>