DRILL-8453: Add XSD Support to XML Reader (Part 1) (#2824)

---------

Co-authored-by: Michael Beckerle <mbeckerle@apache.org>
diff --git a/contrib/format-xml/pom.xml b/contrib/format-xml/pom.xml
index d32c38d..6c253df 100644
--- a/contrib/format-xml/pom.xml
+++ b/contrib/format-xml/pom.xml
@@ -18,7 +18,8 @@
     limitations under the License.
 
 -->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
   <modelVersion>4.0.0</modelVersion>
 
   <parent>
@@ -36,7 +37,16 @@
       <artifactId>drill-java-exec</artifactId>
       <version>${project.version}</version>
     </dependency>
-
+    <dependency>
+      <groupId>org.apache.ws.xmlschema</groupId>
+      <artifactId>xmlschema-core</artifactId>
+      <version>2.3.0</version>
+    </dependency>
+    <dependency>
+        <groupId>org.apache.ws.xmlschema</groupId>
+        <artifactId>xmlschema-walker</artifactId>
+        <version>2.3.0</version>
+    </dependency>
     <!-- Test dependencies -->
     <dependency>
       <groupId>org.apache.drill.exec</groupId>
@@ -81,4 +91,4 @@
       </plugin>
     </plugins>
   </build>
-</project>
\ No newline at end of file
+</project>
diff --git a/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLReader.java b/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLReader.java
index fac7b11..8d1fb59 100644
--- a/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLReader.java
+++ b/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLReader.java
@@ -59,7 +59,7 @@
 
 public class XMLReader implements Closeable {
   private static final Logger logger = LoggerFactory.getLogger(XMLReader.class);
-  private static final String ATTRIBUTE_MAP_NAME = "attributes";
+  public static final String ATTRIBUTE_MAP_NAME = "attributes";
 
   private final Stack<String> fieldNameStack;
   private final Stack<TupleWriter> rowWriterStack;
diff --git a/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/xsd/DrillXSDSchemaUtils.java b/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/xsd/DrillXSDSchemaUtils.java
new file mode 100644
index 0000000..194f640
--- /dev/null
+++ b/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/xsd/DrillXSDSchemaUtils.java
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.drill.exec.store.xml.xsd;
+
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.record.metadata.SchemaBuilder;
+import org.apache.drill.exec.record.metadata.TupleMetadata;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.ImmutableMap;
+import org.apache.ws.commons.schema.XmlSchema;
+import org.apache.ws.commons.schema.XmlSchemaCollection;
+import org.apache.ws.commons.schema.XmlSchemaElement;
+
+import org.apache.ws.commons.schema.XmlSchemaObject;
+import org.apache.ws.commons.schema.walker.XmlSchemaWalker;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.xml.transform.stream.StreamSource;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+
+public class DrillXSDSchemaUtils {
+  private static final MinorType DEFAULT_TYPE = MinorType.VARCHAR;
+  private static final Logger logger = LoggerFactory.getLogger(DrillXSDSchemaUtils.class);
+
+  /**
+   * This map maps the data types defined by the XSD definition to Drill data types.
+   */
+  public static final ImmutableMap<String, MinorType> XML_TYPE_MAPPINGS = ImmutableMap.<String, MinorType>builder()
+    .put("BASE64BINARY", MinorType.VARBINARY)
+    .put("BOOLEAN", MinorType.BIT)
+    .put("DATE", MinorType.DATE)
+    .put("DATETIME", MinorType.TIMESTAMP)
+    .put("DECIMAL", MinorType.VARDECIMAL)
+    .put("DOUBLE", MinorType.FLOAT8)
+    .put("DURATION", MinorType.INTERVAL)
+    .put("FLOAT", MinorType.FLOAT4)
+    .put("HEXBINARY", MinorType.VARBINARY)
+    .put("STRING", MinorType.VARCHAR)
+    .put("TIME", MinorType.TIME)
+    .build();
+
+  /**
+   * This function is only used for testing, but accepts a XSD file as input rather than a {@link InputStream}
+   * @param filename A {@link String} containing an XSD file.
+   * @return A {@link TupleMetadata} containing a Drill representation of the XSD schema.
+   * @throws IOException If anything goes wrong or the file is not found.
+   */
+  @VisibleForTesting
+  public static TupleMetadata getSchema(String filename) throws IOException {
+    InputStream inputStream = Files.newInputStream(Paths.get(filename));
+    return processSchema(inputStream);
+  }
+
+  /**
+   * Returns a {@link TupleMetadata} of the schema from an XSD file from an InputStream.
+   * @param inputStream A {@link InputStream} containing an XSD file.
+   * @return A {@link TupleMetadata} of the schema from the XSD file.
+   */
+  public static TupleMetadata getSchema(InputStream inputStream) {
+    return processSchema(inputStream);
+  }
+
+  private static TupleMetadata processSchema(InputStream inputStream) {
+    XmlSchemaCollection schemaCollection = new XmlSchemaCollection();
+    schemaCollection.read(new StreamSource(inputStream));
+
+    DrillXSDSchemaVisitor schemaVisitor = new DrillXSDSchemaVisitor(new SchemaBuilder());
+    XmlSchema[] schemas = schemaCollection.getXmlSchemas();
+    XmlSchemaWalker walker = new XmlSchemaWalker(schemaCollection, schemaVisitor);
+
+    // Walk all the schemata.
+    for (XmlSchema schema : schemas) {
+      for (XmlSchemaObject schemaObject : schema.getItems()) {
+        if (schemaObject instanceof XmlSchemaElement) {
+          walker.walk((XmlSchemaElement) schemaObject);
+        }
+      }
+    }
+    return schemaVisitor.getDrillSchema();
+  }
+
+  /**
+   * Returns a {@link MinorType} of the corresponding XML Data Type.  Defaults to VARCHAR if unknown
+   * @param xmlType A String of the XML Data Type
+   * @return A {@link MinorType} of the Drill data type.
+   */
+  public static MinorType getDrillDataType(String xmlType) {
+    try {
+      MinorType type = DrillXSDSchemaUtils.XML_TYPE_MAPPINGS.get(xmlType);
+      if (type == null) {
+        return DEFAULT_TYPE;
+      } else {
+        return type;
+      }
+    } catch (NullPointerException e) {
+      logger.warn("Unknown data type found in XSD reader: {}.  Returning VARCHAR.", xmlType);
+      return DEFAULT_TYPE;
+    }
+  }
+}
diff --git a/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/xsd/DrillXSDSchemaVisitor.java b/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/xsd/DrillXSDSchemaVisitor.java
new file mode 100644
index 0000000..83fcf27
--- /dev/null
+++ b/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/xsd/DrillXSDSchemaVisitor.java
@@ -0,0 +1,280 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.drill.exec.store.xml.xsd;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.drill.common.exceptions.UserException;
+import org.apache.drill.common.types.TypeProtos.DataMode;
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.record.metadata.MapBuilder;
+import org.apache.drill.exec.record.metadata.SchemaBuilder;
+import org.apache.drill.exec.record.metadata.TupleMetadata;
+import org.apache.ws.commons.schema.XmlSchemaAll;
+import org.apache.ws.commons.schema.XmlSchemaAny;
+import org.apache.ws.commons.schema.XmlSchemaAnyAttribute;
+import org.apache.ws.commons.schema.XmlSchemaChoice;
+import org.apache.ws.commons.schema.XmlSchemaElement;
+import org.apache.ws.commons.schema.XmlSchemaSequence;
+import org.apache.ws.commons.schema.walker.XmlSchemaAttrInfo;
+import org.apache.ws.commons.schema.walker.XmlSchemaTypeInfo;
+import org.apache.ws.commons.schema.walker.XmlSchemaVisitor;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.List;
+
+import static org.apache.drill.exec.store.xml.XMLReader.ATTRIBUTE_MAP_NAME;
+
+
+/**
+ * This class transforms an XSD schema into a Drill Schema.
+ */
+public class DrillXSDSchemaVisitor implements XmlSchemaVisitor {
+  private static final Logger logger = LoggerFactory.getLogger(DrillXSDSchemaVisitor.class);
+  private SchemaBuilder builder;
+  private MapBuilder currentMapBuilder;
+  private int nestingLevel;
+
+  /**
+   * Table to hold attribute info as it is traversed. We construct the
+   * attributes map for all the attributes when the walker tells us we're
+   * at the end of all the element decl's attributes.
+   * <b/>
+   * Uses {@link LinkedHashMap} to ensure deterministic behavior which facilitates testability.
+   * In this situation it probably does not matter, but it's a good practice.
+   */
+  private HashMap<XmlSchemaElement, List<XmlSchemaAttrInfo>> attributeInfoTable =
+      new LinkedHashMap<>();
+
+  public DrillXSDSchemaVisitor(SchemaBuilder builder) {
+    this.builder = builder;
+    this.nestingLevel = 0;
+  }
+
+  /**
+   * Returns a {@link TupleMetadata} representation of the schema contained in an XSD file. This method should only
+   * be called after the walk method of XmlSchemaWalker has been called.
+   * @return A {@link TupleMetadata} representation of the XSD schema.
+   */
+  public TupleMetadata getDrillSchema() {
+    return builder.build();
+  }
+
+  /**
+   * Handles global elements establishing a map for the child elements and attributes (if any).
+   * <p/>
+   * TBD: Does not handle case where multiple elements have the same name as in:
+   * <pre>{@code
+   * <element name="a" .../>
+   * <element name="b" .../>
+   * <element name="a" .../>
+   * }</pre>
+   * There is also the case where they are ambiguous unless namespaces are used:
+   * <pre>{@code
+   * <element name="a" .../>
+   * <element ref="pre:a" .../> <!-- without namespace, ambiguous with prior "a" -->
+   * }</pre>
+   */
+  @Override
+  public void onEnterElement(XmlSchemaElement xmlSchemaElement, XmlSchemaTypeInfo xmlSchemaTypeInfo, boolean b) {
+    assert nestingLevel >= 0;
+    boolean isRepeated = xmlSchemaElement.getMaxOccurs() > 1;
+    String fieldName = xmlSchemaElement.getName();
+    //
+    // Note that the child name in constant ATTRIBUTE_MAP_NAME is reserved and cannot be used
+    // by any child element.
+    // TODO: There are many other things we want to refuse. E.g., if there are mixed content elements.
+    //
+    if (StringUtils.equals(ATTRIBUTE_MAP_NAME, fieldName)) {
+      throw UserException.dataReadError()
+          .message("XML schema contains a field named " + ATTRIBUTE_MAP_NAME + " which is a " +
+              "reserved word for XML schemata.")
+          .build(logger);
+    }
+
+    if (xmlSchemaTypeInfo.getType().name().equalsIgnoreCase("COMPLEX")) {
+      // Start a map here.
+      logger.debug("Starting map {}.", xmlSchemaElement.getName());
+
+      // There are two cases, if the element belongs to a complex object or not.  If it does not, the currentMapBuilder
+      // will be null. We therefore have to get a MapBuilder object from the SchemaBuilder and save it as the
+      // current MapBuilder.
+      //
+      // In either case, we also need to determine whether the element in question is an array or not.  If it is,
+      // we set the data mode to repeated.
+      if (currentMapBuilder == null) {
+        // global element declaration
+        assert nestingLevel == 0;
+        assert xmlSchemaElement.getMaxOccurs() == 1;
+        assert xmlSchemaElement.getMinOccurs() == 1;
+        currentMapBuilder = builder.addMap(fieldName);
+      } else {
+        // local element decl or element reference
+        // If the current schema element is repeated (IE an array) record it as such.
+        if (isRepeated) {
+          currentMapBuilder = currentMapBuilder.addMapArray(fieldName);
+        } else {
+          currentMapBuilder = currentMapBuilder.addMap(fieldName);
+        }
+      }
+      nestingLevel++;
+    } else {
+      // If the field is a simple type, simply add it to the schema.
+      MinorType dataType = DrillXSDSchemaUtils.getDrillDataType(xmlSchemaTypeInfo.getBaseType().name());
+      if (currentMapBuilder == null) {
+        // global element decl case
+        // Now, strictly speaking an XML document cannot just be a single simple type
+        // element, but for testing reasons, it is convenient to allow this.
+        // If the current map is null, it means we are not in a nested construct
+        assert nestingLevel == 0;
+        assert xmlSchemaElement.getMaxOccurs() == 1;
+        assert xmlSchemaElement.getMinOccurs() == 1;
+        builder.addNullable(fieldName, dataType);
+      } else {
+        // Otherwise, write to the current map builder
+        if (isRepeated) {
+          currentMapBuilder.add(fieldName, dataType, DataMode.REPEATED);
+          logger.debug("Adding array {}.", xmlSchemaElement.getName());
+        } else {
+          currentMapBuilder.addNullable(fieldName, dataType);
+          logger.debug("Adding field {}.", xmlSchemaElement.getName());
+        }
+      }
+      // For simple types, nestingLevel is not increased.
+    }
+  }
+
+  @Override
+  public void onExitElement(XmlSchemaElement xmlSchemaElement, XmlSchemaTypeInfo xmlSchemaTypeInfo, boolean b) {
+    assert nestingLevel >= 0;
+    if (xmlSchemaTypeInfo.getType().name().equalsIgnoreCase("COMPLEX")) {
+      assert nestingLevel >= 1;
+      // This section closes out a nested object. If the nesting level is greater than 0, we make a call to
+      // resumeMap which gets us the parent map.  If we have arrived at the root level, then we need to get a
+      // schema builder and clear out the currentMapBuilder by setting it to null.
+      assert currentMapBuilder != null;
+      logger.debug("Ending map {}.", xmlSchemaElement.getName());
+      if (nestingLevel > 1) {
+        currentMapBuilder = currentMapBuilder.resumeMap();
+      } else {
+        builder = currentMapBuilder.resumeSchema();
+        currentMapBuilder = null;
+      }
+      nestingLevel--;
+    }
+  }
+
+  /**
+   * This method just gathers the elements up into a table.
+   */
+  @Override
+  public void onVisitAttribute(XmlSchemaElement xmlSchemaElement, XmlSchemaAttrInfo xmlSchemaAttrInfo) {
+    List<XmlSchemaAttrInfo> list =
+        attributeInfoTable.getOrDefault(xmlSchemaElement, new ArrayList<>());
+    list.add(xmlSchemaAttrInfo);
+    attributeInfoTable.put(xmlSchemaElement, list);
+  }
+
+  /**
+   * Called for each element decl once all its attributes have been previously
+   * processed by onVisitAttribute.
+   * <b/>
+   * Constructs the map for the special attributes child element of each element.
+   * Note: does not construct an attribute child map if there are no attributes.
+   * <b/>
+   * Only supports attributes with no-namespace on their qnames.
+   * Or rather, ignores namespaces. Only deals with local names.
+   * <b/>
+   * TBD: needs to check for attributes with namespaced names
+   * and at minimum reject them.
+   */
+  @Override
+  public void onEndAttributes(XmlSchemaElement xmlSchemaElement, XmlSchemaTypeInfo xmlSchemaTypeInfo) {
+    List<XmlSchemaAttrInfo> attrs = attributeInfoTable.get(xmlSchemaElement);
+    attributeInfoTable.remove(xmlSchemaElement); // clean up the table
+    // the currentMapBuilder can be null for a global element decl of simple type.
+    if (attrs != null && currentMapBuilder != null) {
+      logger.debug("Starting map {}.", xmlSchemaElement.getName() + "/attributes");
+      assert attrs.size() >= 1;
+      currentMapBuilder = currentMapBuilder.addMap(ATTRIBUTE_MAP_NAME);
+      attrs.forEach(attr -> {
+        String attrName = attr.getAttribute().getName();
+        MinorType dataType = DrillXSDSchemaUtils.getDrillDataType(attr.getType().getBaseType().name());
+        currentMapBuilder = currentMapBuilder.addNullable(attrName, dataType);
+        logger.debug("Adding attribute {}.", attrName);
+
+      });
+      logger.debug("Ending map {}.", xmlSchemaElement.getName() + "/attributes");
+      currentMapBuilder = currentMapBuilder.resumeMap();
+    }
+  }
+
+  @Override
+  public void onEnterSubstitutionGroup(XmlSchemaElement xmlSchemaElement) {
+    // no op
+  }
+
+  @Override
+  public void onExitSubstitutionGroup(XmlSchemaElement xmlSchemaElement) {
+    // no op
+  }
+
+  @Override
+  public void onEnterAllGroup(XmlSchemaAll xmlSchemaAll) {
+    // no op
+  }
+
+  @Override
+  public void onExitAllGroup(XmlSchemaAll xmlSchemaAll) {
+    // no op
+  }
+
+  @Override
+  public void onEnterChoiceGroup(XmlSchemaChoice xmlSchemaChoice) {
+    // no op
+  }
+
+  @Override
+  public void onExitChoiceGroup(XmlSchemaChoice xmlSchemaChoice) {
+    // no op
+  }
+
+  @Override
+  public void onEnterSequenceGroup(XmlSchemaSequence xmlSchemaSequence) {
+    // no op
+  }
+
+  @Override
+  public void onExitSequenceGroup(XmlSchemaSequence xmlSchemaSequence) {
+    // no op
+  }
+
+  @Override
+  public void onVisitAny(XmlSchemaAny xmlSchemaAny) {
+    // no op
+  }
+
+  @Override
+  public void onVisitAnyAttribute(XmlSchemaElement xmlSchemaElement, XmlSchemaAnyAttribute xmlSchemaAnyAttribute) {
+    // no op
+  }
+}
diff --git a/contrib/format-xml/src/test/java/org/apache/drill/exec/store/xml/xsd/TestXSDSchema.java b/contrib/format-xml/src/test/java/org/apache/drill/exec/store/xml/xsd/TestXSDSchema.java
new file mode 100644
index 0000000..58868f1
--- /dev/null
+++ b/contrib/format-xml/src/test/java/org/apache/drill/exec/store/xml/xsd/TestXSDSchema.java
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.drill.exec.store.xml.xsd;
+
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.common.util.DrillFileUtils;
+import org.apache.drill.exec.record.metadata.MapBuilder;
+import org.apache.drill.exec.record.metadata.SchemaBuilder;
+import org.apache.drill.exec.record.metadata.TupleMetadata;
+import org.junit.Test;
+
+import java.io.File;
+
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+public class TestXSDSchema {
+
+  @Test
+  public void testSimpleXSD() throws Exception {
+    File simple_xsd = DrillFileUtils.getResourceAsFile("/xsd/simple.xsd");
+    TupleMetadata schema = DrillXSDSchemaUtils.getSchema(simple_xsd.getPath());
+
+    TupleMetadata expectedSchema  = new SchemaBuilder()
+        .addMap("shiporder")
+          .addMap("attributes")
+            .addNullable("orderid", MinorType.VARCHAR)
+          .resumeMap()
+          .addNullable("orderperson", MinorType.VARCHAR)
+          .addMap("shipto")
+            .addNullable("name", MinorType.VARCHAR)
+            .addNullable("address", MinorType.VARCHAR)
+            .addNullable("city", MinorType.VARCHAR)
+            .addNullable("country", MinorType.VARCHAR)
+        .resumeMap()
+          .addMapArray("item")
+            .addNullable("title", MinorType.VARCHAR)
+            .addNullable("note", MinorType.VARCHAR)
+            .addNullable("quantity", MinorType.VARDECIMAL)
+            .addNullable("price", MinorType.VARDECIMAL)
+          .resumeMap()
+        .resumeSchema()
+      .buildSchema();
+    assertTrue(expectedSchema.isEquivalent(schema));
+  }
+
+
+  @Test
+  public void testComplexXSD() throws Exception {
+    File complex_xsd = DrillFileUtils.getResourceAsFile("/xsd/complex.xsd");
+    TupleMetadata schema = DrillXSDSchemaUtils.getSchema(complex_xsd.getPath());
+
+    SchemaBuilder sb1 = new SchemaBuilder();
+    MapBuilder sb2 = sb1
+        .addNullable("comment", MinorType.VARCHAR) // global comment element
+        .addMap("infoType")
+          .addMap("attributes")
+            .addNullable("kind", MinorType.VARCHAR)
+          .resumeMap()
+        .resumeSchema()
+        .addMap("purchaseOrder") // global purchaseOrder element
+          .addMap("attributes")
+            .addNullable("orderDate", MinorType.DATE) // an attribute
+            .addNullable("confirmDate", MinorType.DATE) // an attribute
+          .resumeMap()
+          .addMap("shipTo")
+            .addMap("attributes")
+              .addNullable("country", MinorType.VARCHAR) // an attribute
+            .resumeMap()
+            .addNullable("name", MinorType.VARCHAR)
+            .addNullable("street", MinorType.VARCHAR)
+            .addNullable("city", MinorType.VARCHAR)
+            .addNullable("state", MinorType.VARCHAR)
+            .addNullable("zip", MinorType.VARDECIMAL)
+          .resumeMap(); // end shipTo
+    MapBuilder sb3 = sb2
+          .addMap("billTo")
+            .addMap("attributes")
+              .addNullable("country", MinorType.VARCHAR) // an attribute
+            .resumeMap()
+            .addNullable("name", MinorType.VARCHAR)
+            .addNullable("street", MinorType.VARCHAR)
+             .addNullable("city", MinorType.VARCHAR)
+            .addNullable("state", MinorType.VARCHAR)
+            .addNullable("zip", MinorType.VARDECIMAL)
+          .resumeMap();
+    MapBuilder sb4 = sb3
+          .addNullable("comment", MinorType.VARCHAR)
+          .addMap("items")
+            .addMapArray("item")
+              .addMap("attributes")
+                .addNullable("partNum", MinorType.VARCHAR) // an attribute
+             .resumeMap()
+              .addNullable("productName", MinorType.VARCHAR)
+              .addNullable("quantity", MinorType.VARDECIMAL)
+              .addNullable("USPrice", MinorType.VARDECIMAL)
+              .addNullable("comment", MinorType.VARCHAR)
+              .addNullable("shipDate", MinorType.DATE)
+            .resumeMap() // end item
+          .resumeMap(); // end items
+
+    TupleMetadata expectedSchema = sb4.resumeSchema().build();
+    assertTrue(expectedSchema.isEquivalent(schema));
+  }
+}
diff --git a/contrib/format-xml/src/test/resources/xsd/complex.xsd b/contrib/format-xml/src/test/resources/xsd/complex.xsd
new file mode 100644
index 0000000..f15b9ef
--- /dev/null
+++ b/contrib/format-xml/src/test/resources/xsd/complex.xsd
@@ -0,0 +1,90 @@
+<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
+            xmlns:tns="http://tempuri.org/PurchaseOrderSchema.xsd"
+            targetNamespace="http://tempuri.org/PurchaseOrderSchema.xsd"
+            elementFormDefault="qualified">
+
+  <!-- example of global simple type element -->
+    <xsd:element name='comment' type='xsd:string'/>
+
+  <!-- example of complex type with ONLY attributes -->
+    <xsd:element name='infoType'>
+      <xsd:complexType>
+        <xsd:attribute name="kind">
+          <xsd:simpleType>
+            <xsd:restriction base="xsd:token">
+              <xsd:enumeration value="byte"/>
+              <xsd:enumeration value="bit"/>
+            </xsd:restriction>
+          </xsd:simpleType>
+        </xsd:attribute>
+      </xsd:complexType>
+    </xsd:element>
+
+    <xsd:element name='purchaseOrder' type='tns:PurchaseOrderType'/>
+
+    <!-- global attribute and attributeGroups are ignored unless referenced from within
+     an element declaration -->
+
+    <xsd:attribute name="aGlobalAttribute" type="xsd:string"/>
+
+    <xsd:complexType name='USAddress'>
+        <xsd:annotation>
+            <xsd:documentation>
+                Purchase order schema for Example.Microsoft.com.
+            </xsd:documentation>
+        </xsd:annotation>
+        <xsd:sequence>
+            <xsd:element name='name'   type='xsd:string'/>
+            <xsd:element name='street' type='xsd:string'/>
+            <xsd:element name='city'   type='xsd:string'/>
+            <xsd:element name='state'  type='xsd:string'/>
+            <xsd:element name='zip'    type='xsd:decimal'/>
+        </xsd:sequence>
+        <xsd:attribute name='country' type='xsd:NMTOKEN' fixed='US'/>
+    </xsd:complexType>
+
+    <xsd:simpleType name='SKU'>
+        <xsd:restriction base='xsd:string'>
+            <xsd:pattern value='\d{3}\w{3}'/>
+        </xsd:restriction>
+    </xsd:simpleType>
+
+    <xsd:complexType name='Items'>
+        <xsd:sequence>
+            <xsd:element name='item' minOccurs='0' maxOccurs='unbounded'>
+                <xsd:complexType>
+                    <xsd:sequence>
+                        <xsd:element name='productName' type='xsd:string'/>
+                        <xsd:element name='quantity'>
+                            <xsd:simpleType>
+                                <xsd:restriction base='xsd:positiveInteger'>
+                                    <xsd:minInclusive value='1'/>
+                                    <xsd:maxExclusive value='100'/>
+                                </xsd:restriction>
+                            </xsd:simpleType>
+                        </xsd:element>
+                        <xsd:element name='USPrice'  type='xsd:decimal'/>
+                        <xsd:element ref='tns:comment'/>
+                        <xsd:element name='shipDate' type='xsd:date' minOccurs='0'/>
+                    </xsd:sequence>
+                    <xsd:attribute name='partNum' type='tns:SKU'/>
+                </xsd:complexType>
+            </xsd:element>
+        </xsd:sequence>
+    </xsd:complexType>
+
+    <xsd:complexType name='PurchaseOrderType'>
+        <xsd:sequence>
+            <xsd:element name='shipTo' type='tns:USAddress'/>
+            <xsd:element name='billTo' type='tns:USAddress'/>
+            <xsd:element ref='tns:comment' minOccurs='0'/>
+            <xsd:element name='items'  type='tns:Items'/>
+        </xsd:sequence>
+        <xsd:attributeGroup ref="tns:dates"/>
+    </xsd:complexType>
+
+  <xsd:attributeGroup name="dates">
+    <xsd:attribute name='orderDate' type='xsd:date'/>
+    <xsd:attribute name='confirmDate' type='xsd:date' use='required'/>
+  </xsd:attributeGroup>
+</xsd:schema>
diff --git a/contrib/format-xml/src/test/resources/xsd/simple.xsd b/contrib/format-xml/src/test/resources/xsd/simple.xsd
new file mode 100644
index 0000000..3eb6e76
--- /dev/null
+++ b/contrib/format-xml/src/test/resources/xsd/simple.xsd
@@ -0,0 +1,32 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
+
+  <xs:element name="shiporder">
+    <xs:complexType>
+      <xs:sequence>
+        <xs:element name="orderperson" type="xs:string"/>
+        <xs:element name="shipto">
+          <xs:complexType>
+            <xs:sequence>
+              <xs:element name="name" type="xs:string"/>
+              <xs:element name="address" type="xs:string"/>
+              <xs:element name="city" type="xs:string"/>
+              <xs:element name="country" type="xs:string"/>
+            </xs:sequence>
+          </xs:complexType>
+        </xs:element>
+        <xs:element name="item" maxOccurs="unbounded">
+          <xs:complexType>
+            <xs:sequence>
+              <xs:element name="title" type="xs:string"/>
+              <xs:element name="note" type="xs:string" minOccurs="0"/>
+              <xs:element name="quantity" type="xs:positiveInteger"/>
+              <xs:element name="price" type="xs:decimal"/>
+            </xs:sequence>
+          </xs:complexType>
+        </xs:element>
+      </xs:sequence>
+      <xs:attribute name="orderid" type="xs:string" use="required"/>
+    </xs:complexType>
+  </xs:element>
+</xs:schema>
diff --git a/exec/jdbc-all/pom.xml b/exec/jdbc-all/pom.xml
index 4ea4f06..64a2196 100644
--- a/exec/jdbc-all/pom.xml
+++ b/exec/jdbc-all/pom.xml
@@ -33,7 +33,7 @@
        "package.namespace.prefix" equals to "oadd.". It can be overridden if necessary within any profile -->
   <properties>
     <package.namespace.prefix>oadd.</package.namespace.prefix>
-    <jdbc-all-jar.maxsize>40500000</jdbc-all-jar.maxsize>
+    <jdbc-all-jar.maxsize>44500000</jdbc-all-jar.maxsize>
   </properties>
 
   <dependencies>
@@ -231,6 +231,14 @@
           <artifactId>commons-compress</artifactId>
         </exclusion>
         <exclusion>
+          <groupId>org.apache.ws.xmlschema</groupId>
+          <artifactId>xmlschema-core</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.apache.ws.xmlschema</groupId>
+          <artifactId>xmlschema-walker</artifactId>
+        </exclusion>
+        <exclusion>
           <groupId>io.airlift</groupId>
           <artifactId>aircompressor</artifactId>
         </exclusion>
diff --git a/pom.xml b/pom.xml
index 418885d..8baa6f7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -366,6 +366,7 @@
             <exclude>**/*.sqllog</exclude>
             <exclude>**/*.sqllog2</exclude>
             <exclude>**/*.syslog</exclude>
+            <exclude>**/*.xsd</exclude>
             <exclude>**/*.xls</exclude>
             <exclude>**/*.xlsx</exclude>
             <exclude>**/*.syslog1</exclude>
@@ -686,7 +687,7 @@
             <!--suppress UnresolvedMavenProperty -->
             <header>${maven.multiModuleProjectDirectory}/header</header>
             <excludes>
-              <exclude>**/*.accdb</exclude>
+<exclude>**/*.accdb</exclude>
               <exclude>**/*.access_log</exclude>
               <exclude>**/.asf.yaml</exclude>
               <exclude>**/*.autotools</exclude>
@@ -777,6 +778,7 @@
               <exclude>**/*.woff2</exclude>
               <exclude>**/*.xls</exclude>
               <exclude>**/*.xlsx</exclude>
+              <exclude>**/*.xsd</exclude>
             </excludes>
             <mapping>
               <boost>SLASHSTAR_STYLE</boost>