Merge pull request #227 from mgov88/master

XSD validation for Lucene datastore
diff --git a/gora-lucene/src/main/java/org/apache/gora/lucene/store/LuceneStore.java b/gora-lucene/src/main/java/org/apache/gora/lucene/store/LuceneStore.java
index 4e749fb..73fb340 100644
--- a/gora-lucene/src/main/java/org/apache/gora/lucene/store/LuceneStore.java
+++ b/gora-lucene/src/main/java/org/apache/gora/lucene/store/LuceneStore.java
@@ -26,8 +26,11 @@
 import java.util.Locale;
 import java.util.Properties;
 import java.util.Set;
+import javax.xml.XMLConstants;
 import javax.xml.parsers.DocumentBuilder;
 import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.transform.stream.StreamSource;
+import javax.xml.validation.SchemaFactory;
 import org.apache.avro.Schema;
 import org.apache.avro.Schema.Type;
 import org.apache.avro.specific.SpecificDatumReader;
@@ -86,6 +89,8 @@
   private static final Logger LOG = LoggerFactory.getLogger(LuceneStore.class);
 
   private static final String DEFAULT_MAPPING_FILE = "gora-lucene-mapping.xml";
+  private static final String XSD_MAPPING_FILE = "gora-lucene.xsd";
+  private static final String XSD_VALIDATION = "gora.xsd_validation";
   private static final String LUCENE_VERSION_KEY = "gora.lucene.index.version";
   private static final String DEFAULT_LUCENE_VERSION = "LATEST";
   private static final String LUCENE_RAM_BUFFER_KEY = "gora.lucene.index.writer.rambuffer";
@@ -123,7 +128,8 @@
     LOG.debug("Lucene index writer RAM buffer size: {}", ramBuffer);
 
     try {
-      mapping = readMapping(mappingFile);
+      String xsdval = properties.getProperty(XSD_VALIDATION, "false");
+      mapping = readMapping(mappingFile, Boolean.valueOf(xsdval));
     } catch (IOException ioe) {
       LOG.error(ioe.getMessage(), ioe);
       throw new GoraException(ioe);
@@ -148,11 +154,15 @@
     }
   }
 
-  private LuceneMapping readMapping(String filename) throws IOException {
+  private LuceneMapping readMapping(String filename, boolean xsdValidation) throws IOException {
     try {
 
       LuceneMapping mapping = new LuceneMapping();
-
+      if (xsdValidation) {
+        javax.xml.validation.Schema newSchema = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI)
+                .newSchema(new StreamSource(getClass().getClassLoader().getResourceAsStream(XSD_MAPPING_FILE)));
+        newSchema.newValidator().validate(new StreamSource(getClass().getClassLoader().getResourceAsStream(filename)));
+      }
       DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder();
       org.w3c.dom.Document dom = db.parse(getClass().getClassLoader().getResourceAsStream(filename));
       Element root = dom.getDocumentElement();
diff --git a/gora-lucene/src/main/resources/gora-lucene.xsd b/gora-lucene/src/main/resources/gora-lucene.xsd
new file mode 100644
index 0000000..62bfba2
--- /dev/null
+++ b/gora-lucene/src/main/resources/gora-lucene.xsd
@@ -0,0 +1,65 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
+  <xs:element name="gora-otd" type="lucene-mapping"/>
+    
+  <xs:complexType name="lucene-mapping">
+    <xs:sequence>
+      <xs:element name="class" type="class-mapping" maxOccurs="unbounded"/>
+    </xs:sequence>
+  </xs:complexType>
+    
+  <xs:complexType name="class-mapping">
+    <xs:sequence>
+      <xs:element name="primarykey" type="primarykey-mapping"/>
+      <xs:element name="field" type="field-mapping" maxOccurs="unbounded"/>
+    </xs:sequence>
+    <xs:attribute name="name" type="nameClass-types" use="required"/> 
+    <xs:attribute name="keyClass" type="keyClass-types" use="required"/>
+  </xs:complexType>
+    
+  <xs:complexType name="primarykey-mapping">
+    <xs:attribute name="column" type="xs:string" use="required"/> 
+  </xs:complexType>
+    
+  <xs:complexType name="field-mapping">
+    <xs:attribute name="name" type="fieldName-types" use="required"/> 
+    <xs:attribute name="column" type="xs:string" use="required"/> 
+  </xs:complexType>
+    
+  <xs:simpleType name="keyClass-types">
+    <xs:restriction base="xs:string">
+      <xs:enumeration value="java.lang.String"/>
+      <xs:enumeration value="java.lang.Integer"/>
+    </xs:restriction>
+  </xs:simpleType>
+    
+  <xs:simpleType name="nameClass-types">
+    <xs:restriction base="xs:string">
+      <xs:pattern value="([\p{L}_$][\p{L}\p{N}_$]*\.)*[\p{L}_$][\p{L}\p{N}_$]*"/>
+    </xs:restriction>
+  </xs:simpleType>
+    
+  <xs:simpleType name="fieldName-types">
+    <xs:restriction base="xs:string">
+      <xs:pattern value="[a-zA-Z][a-zA-Z0-9]*"/>
+    </xs:restriction>
+  </xs:simpleType>
+    
+</xs:schema> 
diff --git a/gora-lucene/src/test/conf/gora-lucene-mapping-bad.xml b/gora-lucene/src/test/conf/gora-lucene-mapping-bad.xml
new file mode 100644
index 0000000..2d2d1c7
--- /dev/null
+++ b/gora-lucene/src/test/conf/gora-lucene-mapping-bad.xml
@@ -0,0 +1,31 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<gora-otd xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
+          xsi:noNamespaceSchemaLocation="gora-lucene.xsd">
+
+  <class name="org.apache.gora.examples.generated.EmployeeInt" keyClass="java.lang.Integer">
+    <!--
+      Remove primarykey to test XSD validation.
+      <primarykey column="ssn"/>
+    -->
+    <field Name="ssn" columns="ssn"/>
+  </class>
+  
+</gora-otd>
+
diff --git a/gora-lucene/src/test/conf/gora-lucene-mapping.xml b/gora-lucene/src/test/conf/gora-lucene-mapping.xml
index c723966..c69bdd2 100644
--- a/gora-lucene/src/test/conf/gora-lucene-mapping.xml
+++ b/gora-lucene/src/test/conf/gora-lucene-mapping.xml
@@ -16,7 +16,8 @@
    limitations under the License.
 -->
 
-<gora-otd>
+<gora-otd xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
+          xsi:noNamespaceSchemaLocation="gora-lucene.xsd">
 
   <class name="org.apache.gora.examples.generated.EmployeeInt" keyClass="java.lang.Integer">
     <primarykey column="ssn"/>
diff --git a/gora-lucene/src/test/java/org/apache/gora/lucene/store/TestLuceneStore.java b/gora-lucene/src/test/java/org/apache/gora/lucene/store/TestLuceneStore.java
index 9d71238..ece1e31 100644
--- a/gora-lucene/src/test/java/org/apache/gora/lucene/store/TestLuceneStore.java
+++ b/gora-lucene/src/test/java/org/apache/gora/lucene/store/TestLuceneStore.java
@@ -17,6 +17,7 @@
  */
 package org.apache.gora.lucene.store;
 
+import java.util.Properties;
 import org.apache.gora.examples.WebPageDataCreator;
 import static org.apache.gora.examples.WebPageDataCreator.SORTED_URLS;
 import static org.apache.gora.examples.WebPageDataCreator.URLS;
@@ -25,6 +26,7 @@
 import org.apache.gora.query.Query;
 import org.apache.gora.query.Result;
 import org.apache.gora.store.DataStore;
+import org.apache.gora.store.DataStoreFactory;
 import org.apache.gora.store.DataStoreTestBase;
 import static org.apache.gora.store.DataStoreTestBase.log;
 import org.apache.gora.store.DataStoreTestUtil;
@@ -52,7 +54,7 @@
     setTestDriver(new TestLuceneStoreDriver());
     DataStoreTestBase.setUpClass();
   }
-
+  
   @Test(expected = AssertionError.class)
   public void testSchemaExists() throws Exception {
     super.testSchemaExists();
@@ -168,5 +170,18 @@
       }
     }
   }
+  
+  /**
+   * XSD Validation.
+   * 
+   * Validate bad formatted XML Mappings.
+   */
+  @Test(expected = GoraException.class)
+  public void testXSDValidation() throws Exception {
+    Properties properties = new Properties();
+    properties.setProperty("gora.xsd_validation", "true");
+    properties.setProperty("gora.lucenestore.mapping.file", "gora-lucene-mapping-bad.xml");
+    DataStoreTestBase.testDriver.createDataStore(String.class, EmployeeInt.class, properties);
+  }
 
 }