VXQUERY-32 Integrate Apache Lucene Added two functions to VXQuery: build-collection-on-index creates a lucene index for a collection collection-from-index queries a lucene index to return a collection Added tests

commit: 1f623b1664fb5a86dae8b6be2497d822f80505d9 [log] [tgz]
author: Steven Glenn Jacobs <sjaco002@ucr.edu> Fri May 27 12:42:47 2016 -0700
committer: Steven Glenn Jacobs <sjaco002@ucr.edu> Fri May 27 12:42:47 2016 -0700
tree: e4bc56d292bafa8fd0012359c9bb44aa53b9d31d
parent: 2b59326602b3f7d7262f3241971b786dde152241 [diff]
diff --git a/vxquery-core/pom.xml b/vxquery-core/pom.xml
index d8f5f53..56eb45f 100644
--- a/vxquery-core/pom.xml
+++ b/vxquery-core/pom.xml

@@ -266,7 +266,36 @@
             <artifactId>junit</artifactId>
             <scope>test</scope>
         </dependency>
-
+        <dependency>
+            <artifactId>lucene-core</artifactId>
+            <groupId>org.apache.lucene</groupId>
+            <type>jar</type>
+            <version>5.5.1</version>
+        </dependency>
+        <dependency>
+            <artifactId>lucene-queryparser</artifactId>
+            <groupId>org.apache.lucene</groupId>
+            <type>jar</type>
+            <version>5.5.1</version>
+        </dependency>
+        <dependency>
+            <artifactId>lucene-analyzers-common</artifactId>
+            <groupId>org.apache.lucene</groupId>
+            <type>jar</type>
+            <version>5.5.1</version>
+        </dependency>
+        <dependency>
+            <artifactId>lucene-demo</artifactId>
+            <groupId>org.apache.lucene</groupId>
+            <type>jar</type>
+            <version>5.5.1</version>
+        </dependency>
+        <dependency>
+            <artifactId>lucene-backward-codecs</artifactId>
+            <groupId>org.apache.lucene</groupId>
+            <type>jar</type>
+            <version>5.5.1</version>
+        </dependency>
         <dependency>
             <groupId>org.apache.hadoop</groupId>
             <artifactId>hadoop-mapreduce-client-core</artifactId>

diff --git a/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml b/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml
index 3b9371d..8379ccf 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml
+++ b/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml

@@ -128,6 +128,28 @@
         <!-- Collection operator is added during the rewrite rules phase.  -->
     </function>
     
+    <!-- fn:build-index-on-collection($arg  as xs:string?, $indexFolder  as xs:string?) as  node()* -->
+    <function name="fn:build-index-on-collection">
+        <param name="collection-folder" type="xs:string?"/>
+        <param name="index-folder" type="xs:string?"/>
+        <return type="node()*"/> 
+        <runtime type="scalar" class="org.apache.vxquery.runtime.functions.index.IndexConstructorScalarEvaluatorFactory"/>
+    </function>
+    
+    <!-- fn:collection-from-index($indexfolder  as xs:string?, $elementpath as xs:string?) as  node()* -->
+    <function name="fn:collection-from-index">
+        <param name="index-folder" type="xs:string?"/>
+        <param name="element-path" type="xs:string?"/>
+        <return type="node()*"/> 
+         <runtime type="unnesting" class="org.apache.vxquery.runtime.functions.index.CollectionFromIndexUnnestingEvaluatorFactory"/>
+        <property type="DocumentOrder" class="org.apache.vxquery.compiler.rewriter.rules.propagationpolicies.InputPropertyPropagationPolicy">
+            <argument value="0"/>
+        </property>
+        <property type="UniqueNodes" class="org.apache.vxquery.compiler.rewriter.rules.propagationpolicies.InputPropertyPropagationPolicy">
+            <argument value="0"/>
+        </property>
+    </function>
+    
     <!-- fn:collection-with-tag($arg1  as xs:string?, $arg2 as xs:string?) as  node()* -->
     <function name="fn:collection-with-tag">
         <param name="arg1" type="xs:string?"/>

diff --git a/vxquery-core/src/main/java/org/apache/vxquery/index/IndexAttributes.java b/vxquery-core/src/main/java/org/apache/vxquery/index/IndexAttributes.java
new file mode 100644
index 0000000..cf8e3c0
--- /dev/null
+++ b/vxquery-core/src/main/java/org/apache/vxquery/index/IndexAttributes.java

@@ -0,0 +1,287 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.vxquery.index;
+
+import java.util.List;
+
+import org.xml.sax.Attributes;
+
+public class IndexAttributes implements Attributes {
+    int length;
+
+    List<String> names;
+    List<String> values;
+    List<String> uris;
+    List<String> localnames;
+    List<String> types;
+    List<String> qnames;
+
+    public IndexAttributes(List<String> n, List<String> v, List<String> u, List<String> l, List<String> t,
+            List<String> q) {
+        length = n.size();
+        names = n;
+        values = v;
+        uris = u;
+        localnames = l;
+        types = t;
+        qnames = q;
+    }
+
+    /**
+     * Return the number of attributes in the list.
+     * <p>
+     * Once you know the number of attributes, you can iterate through the list.
+     * </p>
+     * 
+     * @return The number of attributes in the list.
+     * @see #getURI(int)
+     * @see #getLocalName(int)
+     * @see #getQName(int)
+     * @see #getType(int)
+     * @see #getValue(int)
+     */
+    @Override
+    public int getLength() {
+        return length;
+    }
+
+    /**
+     * Look up an attribute's Namespace URI by index.
+     * 
+     * @param index
+     *            The attribute index (zero-based).
+     * @return The Namespace URI, or the empty string if none
+     *         is available, or null if the index is out of
+     *         range.
+     * @see #getLength
+     */
+    @Override
+    public String getURI(int index) {
+        return uris.get(index);
+    }
+
+    /**
+     * Look up an attribute's local name by index.
+     * 
+     * @param index
+     *            The attribute index (zero-based).
+     * @return The local name, or the empty string if Namespace
+     *         processing is not being performed, or null
+     *         if the index is out of range.
+     * @see #getLength
+     */
+    @Override
+    public String getLocalName(int index) {
+        return localnames.get(index);
+    }
+
+    /**
+     * Look up an attribute's XML qualified (prefixed) name by index.
+     * 
+     * @param index
+     *            The attribute index (zero-based).
+     * @return The XML qualified name, or the empty string
+     *         if none is available, or null if the index
+     *         is out of range.
+     * @see #getLength
+     */
+    @Override
+    public String getQName(int index) {
+        return qnames.get(index);
+    }
+
+    /**
+     * Look up an attribute's type by index.
+     * <p>
+     * The attribute type is one of the strings "CDATA", "ID", "IDREF", "IDREFS", "NMTOKEN", "NMTOKENS", "ENTITY",
+     * "ENTITIES", or "NOTATION" (always in upper case).
+     * </p>
+     * <p>
+     * If the parser has not read a declaration for the attribute, or if the parser does not report attribute types,
+     * then it must return the value "CDATA" as stated in the XML 1.0 Recommendation (clause 3.3.3,
+     * "Attribute-Value Normalization").
+     * </p>
+     * <p>
+     * For an enumerated attribute that is not a notation, the parser will report the type as "NMTOKEN".
+     * </p>
+     * 
+     * @param index
+     *            The attribute index (zero-based).
+     * @return The attribute's type as a string, or null if the
+     *         index is out of range.
+     * @see #getLength
+     */
+    @Override
+    public String getType(int index) {
+        return types.get(index);
+    }
+
+    /**
+     * Look up an attribute's value by index.
+     * <p>
+     * If the attribute value is a list of tokens (IDREFS, ENTITIES, or NMTOKENS), the tokens will be concatenated into
+     * a single string with each token separated by a single space.
+     * </p>
+     * 
+     * @param index
+     *            The attribute index (zero-based).
+     * @return The attribute's value as a string, or null if the
+     *         index is out of range.
+     * @see #getLength
+     */
+    @Override
+    public String getValue(int index) {
+        return values.get(index);
+    }
+
+    ////////////////////////////////////////////////////////////////////
+    // Name-based query.
+    ////////////////////////////////////////////////////////////////////
+
+    /**
+     * Look up the index of an attribute by Namespace name.
+     * 
+     * @param uri
+     *            The Namespace URI, or the empty string if
+     *            the name has no Namespace URI.
+     * @param localName
+     *            The attribute's local name.
+     * @return The index of the attribute, or -1 if it does not
+     *         appear in the list.
+     */
+    @Override
+    public int getIndex(String uri, String localName) {
+        for (int i = 0; i < length; i++) {
+            if (localnames.get(i).equals(localName) && uris.get(i).equals(uri)) {
+                return i;
+            }
+        }
+        return -1;
+    }
+
+    /**
+     * Look up the index of an attribute by XML qualified (prefixed) name.
+     * 
+     * @param qName
+     *            The qualified (prefixed) name.
+     * @return The index of the attribute, or -1 if it does not
+     *         appear in the list.
+     */
+    @Override
+    public int getIndex(String qName) {
+        for (int i = 0; i < length; i++) {
+            if (qnames.get(i).equals(qName)) {
+                return i;
+            }
+        }
+        return -1;
+    }
+
+    /**
+     * Look up an attribute's type by Namespace name.
+     * <p>
+     * See {@link #getType(int) getType(int)} for a description of the possible types.
+     * </p>
+     * 
+     * @param uri
+     *            The Namespace URI, or the empty String if the
+     *            name has no Namespace URI.
+     * @param localName
+     *            The local name of the attribute.
+     * @return The attribute type as a string, or null if the
+     *         attribute is not in the list or if Namespace
+     *         processing is not being performed.
+     */
+    @Override
+    public String getType(String uri, String localName) {
+        for (int i = 0; i < length; i++) {
+            if (localnames.get(i).equals(localName) && uris.get(i).equals(uri)) {
+                return types.get(i);
+            }
+        }
+        return null;
+    }
+
+    /**
+     * Look up an attribute's type by XML qualified (prefixed) name.
+     * <p>
+     * See {@link #getType(int) getType(int)} for a description of the possible types.
+     * </p>
+     * 
+     * @param qName
+     *            The XML qualified name.
+     * @return The attribute type as a string, or null if the
+     *         attribute is not in the list or if qualified names
+     *         are not available.
+     */
+    @Override
+    public String getType(String qName) {
+        for (int i = 0; i < length; i++) {
+            if (qnames.get(i).equals(qName)) {
+                return types.get(i);
+            }
+        }
+        return null;
+    }
+
+    /**
+     * Look up an attribute's value by Namespace name.
+     * <p>
+     * See {@link #getValue(int) getValue(int)} for a description of the possible values.
+     * </p>
+     * 
+     * @param uri
+     *            The Namespace URI, or the empty String if the
+     *            name has no Namespace URI.
+     * @param localName
+     *            The local name of the attribute.
+     * @return The attribute value as a string, or null if the
+     *         attribute is not in the list.
+     */
+    @Override
+    public String getValue(String uri, String localName) {
+        for (int i = 0; i < length; i++) {
+            if (localnames.get(i).equals(localName) && uris.get(i).equals(uri)) {
+                return values.get(i);
+            }
+        }
+        return null;
+    }
+
+    /**
+     * Look up an attribute's value by XML qualified (prefixed) name.
+     * <p>
+     * See {@link #getValue(int) getValue(int)} for a description of the possible values.
+     * </p>
+     * 
+     * @param qName
+     *            The XML qualified name.
+     * @return The attribute value as a string, or null if the
+     *         attribute is not in the list or if qualified names
+     *         are not available.
+     */
+    @Override
+    public String getValue(String qName) {
+        for (int i = 0; i < length; i++) {
+            if (qnames.get(i).equals(qName)) {
+                return values.get(i);
+            }
+        }
+        return null;
+    }
+
+}

diff --git a/vxquery-core/src/main/java/org/apache/vxquery/index/IndexDocumentBuilder.java b/vxquery-core/src/main/java/org/apache/vxquery/index/IndexDocumentBuilder.java
new file mode 100644
index 0000000..2884097
--- /dev/null
+++ b/vxquery-core/src/main/java/org/apache/vxquery/index/IndexDocumentBuilder.java

@@ -0,0 +1,882 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.vxquery.index;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hyracks.data.std.api.IPointable;
+import org.apache.hyracks.data.std.primitive.BooleanPointable;
+import org.apache.hyracks.data.std.primitive.BytePointable;
+import org.apache.hyracks.data.std.primitive.DoublePointable;
+import org.apache.hyracks.data.std.primitive.FloatPointable;
+import org.apache.hyracks.data.std.primitive.IntegerPointable;
+import org.apache.hyracks.data.std.primitive.LongPointable;
+import org.apache.hyracks.data.std.primitive.ShortPointable;
+import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+import org.apache.hyracks.data.std.primitive.VoidPointable;
+import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.vxquery.datamodel.accessors.PointablePool;
+import org.apache.vxquery.datamodel.accessors.PointablePoolFactory;
+import org.apache.vxquery.datamodel.accessors.SequencePointable;
+import org.apache.vxquery.datamodel.accessors.TaggedValuePointable;
+import org.apache.vxquery.datamodel.accessors.atomic.CodedQNamePointable;
+import org.apache.vxquery.datamodel.accessors.atomic.XSBinaryPointable;
+import org.apache.vxquery.datamodel.accessors.atomic.XSDatePointable;
+import org.apache.vxquery.datamodel.accessors.atomic.XSDateTimePointable;
+import org.apache.vxquery.datamodel.accessors.atomic.XSDecimalPointable;
+import org.apache.vxquery.datamodel.accessors.atomic.XSDurationPointable;
+import org.apache.vxquery.datamodel.accessors.atomic.XSQNamePointable;
+import org.apache.vxquery.datamodel.accessors.atomic.XSTimePointable;
+import org.apache.vxquery.datamodel.accessors.nodes.AttributeNodePointable;
+import org.apache.vxquery.datamodel.accessors.nodes.DocumentNodePointable;
+import org.apache.vxquery.datamodel.accessors.nodes.ElementNodePointable;
+import org.apache.vxquery.datamodel.accessors.nodes.NodeTreePointable;
+import org.apache.vxquery.datamodel.accessors.nodes.TextOrCommentNodePointable;
+import org.apache.vxquery.datamodel.values.ValueTag;
+import org.apache.vxquery.runtime.functions.cast.CastToStringOperation;
+import org.apache.vxquery.serializer.XMLSerializer;
+
+public class IndexDocumentBuilder extends XMLSerializer {
+    private final IPointable treePointable;
+
+    private final PointablePool pp;
+    private NodeTreePointable ntp;
+
+    private final ArrayBackedValueStorage abvs = new ArrayBackedValueStorage();
+    private final DataOutput dOut = abvs.getDataOutput();
+    private final CastToStringOperation castToString = new CastToStringOperation();
+    private final Document doc;
+    private final List<ComplexItem> results;
+
+    private final byte[] bstart;
+    private final int sstart;
+    private final int lstart;
+    private final IndexWriter writer;
+
+    class ComplexItem {
+        public final StringField sf;
+        public final String id;
+
+        public ComplexItem(StringField sfin, String idin) {
+            sf = sfin;
+            id = idin;
+        }
+    }
+
+    //TODO: Handle Processing Instructions, PrefixedNames, and Namepsace entries
+    public IndexDocumentBuilder(IPointable tree, IndexWriter inWriter) {
+        this.treePointable = tree;
+        writer = inWriter;
+
+        //convert to tagged value pointable
+        TaggedValuePointable tvp = (TaggedValuePointable) TaggedValuePointable.FACTORY.createPointable();
+        tvp.set(treePointable.getByteArray(), 0, treePointable.getLength());
+
+        //get bytes and info from doc pointer
+        bstart = tvp.getByteArray();
+        sstart = tvp.getStartOffset();
+        lstart = tvp.getLength();
+
+        doc = new Document();
+
+        results = new ArrayList<ComplexItem>();
+
+        pp = PointablePoolFactory.INSTANCE.createPointablePool();
+    }
+
+    //This is a wrapper to start indexing using the functions adapted from XMLSerializer
+    public void printStart() throws IOException {
+
+        print(bstart, sstart, lstart, "0", "");
+        for (int i = 1; i < results.size() - 1; i++) {
+            //TODO: Since each doc is a file,
+            //we can only handle files 
+            //small enough to fit in memory
+            doc.add(results.get(i).sf);
+        }
+        writer.addDocument(doc);
+
+    }
+
+    //adapted from XMLSerializer. The following functions are used to traverse the TaggedValuePointable
+    //and create the index elements, then create the item for the lucene index
+    public void print(byte[] b, int s, int l, String deweyId, String epath) throws IOException {
+        TaggedValuePointable tvp = pp.takeOne(TaggedValuePointable.class);
+        try {
+            tvp.set(b, s, l);
+            printTaggedValuePointable(tvp, deweyId, epath);
+        } finally {
+            pp.giveBack(tvp);
+        }
+    }
+
+    private void printTaggedValuePointable(TaggedValuePointable tvp, String deweyId, String epath) throws IOException {
+        byte tag = tvp.getTag();
+        String type = "text";
+        String[] result = { "", "" };
+        switch ((int) tag) {
+            case ValueTag.XS_ANY_URI_TAG:
+                result = printString(tvp, epath);
+                break;
+
+            case ValueTag.XS_BASE64_BINARY_TAG:
+                result = printBase64Binary(tvp, epath);
+                break;
+
+            case ValueTag.XS_BOOLEAN_TAG:
+                result = printBoolean(tvp, epath);
+                break;
+
+            case ValueTag.XS_DATE_TAG:
+                result = printDate(tvp, epath);
+                break;
+
+            case ValueTag.XS_DATETIME_TAG:
+                result = printDateTime(tvp, epath);
+                break;
+
+            case ValueTag.XS_DAY_TIME_DURATION_TAG:
+                result = printDTDuration(tvp, epath);
+                break;
+
+            case ValueTag.XS_BYTE_TAG:
+                result = printByte(tvp, epath);
+                break;
+
+            case ValueTag.XS_DECIMAL_TAG:
+                result = printDecimal(tvp, epath);
+                break;
+
+            case ValueTag.XS_DOUBLE_TAG:
+                result = printDouble(tvp, epath);
+                break;
+
+            case ValueTag.XS_DURATION_TAG:
+                result = printDuration(tvp, epath);
+                break;
+
+            case ValueTag.XS_FLOAT_TAG:
+                result = printFloat(tvp, epath);
+                break;
+
+            case ValueTag.XS_G_DAY_TAG:
+                result = printGDay(tvp, epath);
+                break;
+
+            case ValueTag.XS_G_MONTH_TAG:
+                result = printGMonth(tvp, epath);
+                break;
+
+            case ValueTag.XS_G_MONTH_DAY_TAG:
+                result = printGMonthDay(tvp, epath);
+                break;
+
+            case ValueTag.XS_G_YEAR_TAG:
+                result = printGYear(tvp, epath);
+                break;
+
+            case ValueTag.XS_G_YEAR_MONTH_TAG:
+                result = printGYearMonth(tvp, epath);
+                break;
+
+            case ValueTag.XS_HEX_BINARY_TAG:
+                result = printHexBinary(tvp, epath);
+                break;
+
+            case ValueTag.XS_INT_TAG:
+            case ValueTag.XS_UNSIGNED_SHORT_TAG:
+                result = printInt(tvp, epath);
+                break;
+
+            case ValueTag.XS_INTEGER_TAG:
+            case ValueTag.XS_LONG_TAG:
+            case ValueTag.XS_NEGATIVE_INTEGER_TAG:
+            case ValueTag.XS_NON_POSITIVE_INTEGER_TAG:
+            case ValueTag.XS_NON_NEGATIVE_INTEGER_TAG:
+            case ValueTag.XS_POSITIVE_INTEGER_TAG:
+            case ValueTag.XS_UNSIGNED_INT_TAG:
+            case ValueTag.XS_UNSIGNED_LONG_TAG:
+                result = printInteger(tvp, epath);
+                break;
+
+            case ValueTag.XS_NOTATION_TAG:
+                result = printString(tvp, epath);
+                break;
+
+            case ValueTag.XS_QNAME_TAG:
+                result = printQName(tvp, epath);
+                break;
+
+            case ValueTag.XS_SHORT_TAG:
+            case ValueTag.XS_UNSIGNED_BYTE_TAG:
+                result = printShort(tvp, epath);
+                break;
+
+            case ValueTag.XS_STRING_TAG:
+            case ValueTag.XS_NORMALIZED_STRING_TAG:
+            case ValueTag.XS_TOKEN_TAG:
+            case ValueTag.XS_LANGUAGE_TAG:
+            case ValueTag.XS_NMTOKEN_TAG:
+            case ValueTag.XS_NAME_TAG:
+            case ValueTag.XS_NCNAME_TAG:
+            case ValueTag.XS_ID_TAG:
+            case ValueTag.XS_IDREF_TAG:
+            case ValueTag.XS_ENTITY_TAG:
+                result = printString(tvp, epath);
+                break;
+
+            case ValueTag.XS_TIME_TAG:
+                result = printTime(tvp, epath);
+                break;
+
+            case ValueTag.XS_UNTYPED_ATOMIC_TAG:
+                result = printString(tvp, epath);
+                break;
+
+            case ValueTag.XS_YEAR_MONTH_DURATION_TAG:
+                result = printYMDuration(tvp, epath);
+                break;
+
+            case ValueTag.ATTRIBUTE_NODE_TAG:
+                type = "attribute";
+                printAttributeNode(tvp, deweyId, epath);
+                break;
+
+            case ValueTag.TEXT_NODE_TAG:
+                type = "textnode";
+                result = printTextNode(tvp, epath);
+                break;
+
+            case ValueTag.COMMENT_NODE_TAG:
+                type = "comment";
+                result = printCommentNode(tvp, epath);
+                break;
+
+            case ValueTag.SEQUENCE_TAG:
+                type = "sequence";
+                printSequence(tvp, deweyId, epath);
+                break;
+
+            case ValueTag.NODE_TREE_TAG:
+                type = "tree";
+                printNodeTree(tvp, deweyId, epath);
+                break;
+
+            case ValueTag.ELEMENT_NODE_TAG:
+                type = "element";
+                printElementNode(tvp, deweyId, epath);
+                break;
+
+            case ValueTag.DOCUMENT_NODE_TAG:
+                type = "doc";
+                buildIndexItem(deweyId, type, result, epath);
+                printDocumentNode(tvp, deweyId, epath);
+                break;
+
+            default:
+                throw new UnsupportedOperationException("Encountered tag: " + tvp.getTag());
+        }
+        if ((int) tag != ValueTag.DOCUMENT_NODE_TAG && (int) tag != ValueTag.SEQUENCE_TAG
+                && (int) tag != ValueTag.NODE_TREE_TAG && (int) tag != ValueTag.ELEMENT_NODE_TAG
+                && (int) tag != ValueTag.ATTRIBUTE_NODE_TAG) {
+            buildIndexItem(deweyId, type, result, epath);
+        }
+
+    }
+
+    private void buildIndexItem(String deweyId, String type, String[] result, String parentPath) {
+        //Create an Index element
+        IndexElement test = new IndexElement(deweyId, type, result[1]);
+
+        String path = test.epath();
+
+        path = StringUtils.replace(path, parentPath, "");
+        //Parser doesn't like / so paths are saved as name.name....
+        String luceneParentPath = parentPath.replaceAll("/", ".");
+
+        if (!type.equals("doc")) {
+            path = path.replaceFirst("/", ":");
+        } else {
+            luceneParentPath = "";
+        }
+        //Parser doesn't like / so paths are saved as name.name....
+        path = path.replaceAll("/", ".");
+        //Add this element to the array (they will be added in reverse order.
+        String fullItem = luceneParentPath + path + "." + test.type();
+
+        results.add(new ComplexItem(new StringField("item", fullItem, Field.Store.YES), test.id()));
+    }
+
+    private String[] printDecimal(TaggedValuePointable tvp, String path) throws IOException {
+        String[] result = { "", path };
+        XSDecimalPointable dp = pp.takeOne(XSDecimalPointable.class);
+        try {
+            tvp.getValue(dp);
+            abvs.reset();
+            castToString.convertDecimal(dp, dOut);
+            result = printStringAbvs(path);
+        } catch (Exception e) {
+            throw new IOException(e);
+        } finally {
+            pp.giveBack(dp);
+        }
+        return result;
+    }
+
+    private void printNodeTree(TaggedValuePointable tvp, String deweyId, String path) throws IOException {
+        if (ntp != null) {
+            throw new IllegalStateException("Nested NodeTreePointable found");
+        }
+        ntp = pp.takeOne(NodeTreePointable.class);
+        TaggedValuePointable rootTVP = pp.takeOne(TaggedValuePointable.class);
+        try {
+            tvp.getValue(ntp);
+            ntp.getRootNode(rootTVP);
+            printTaggedValuePointable(rootTVP, deweyId, path);
+        } finally {
+            pp.giveBack(rootTVP);
+            pp.giveBack(ntp);
+            ntp = null;
+        }
+    }
+
+    private String[] printCommentNode(TaggedValuePointable tvp, String path) {
+        String[] result = { "", path };
+        TextOrCommentNodePointable tcnp = pp.takeOne(TextOrCommentNodePointable.class);
+        UTF8StringPointable utf8sp = pp.takeOne(UTF8StringPointable.class);
+        try {
+            tvp.getValue(tcnp);
+            tcnp.getValue(ntp, utf8sp);
+
+            result = printString(utf8sp, path);
+
+        } finally {
+            pp.giveBack(tcnp);
+            pp.giveBack(utf8sp);
+        }
+        return result;
+    }
+
+    private String[] printTextNode(TaggedValuePointable tvp, String path) {
+        String[] result = { "", path };
+        TextOrCommentNodePointable tcnp = pp.takeOne(TextOrCommentNodePointable.class);
+        UTF8StringPointable utf8sp = pp.takeOne(UTF8StringPointable.class);
+        try {
+            tvp.getValue(tcnp);
+            tcnp.getValue(ntp, utf8sp);
+            result = printString(utf8sp, path);
+        } finally {
+            pp.giveBack(tcnp);
+            pp.giveBack(utf8sp);
+        }
+        return result;
+    }
+
+    private void printAttributeNode(TaggedValuePointable tvp, String deweyId, String path) throws IOException {
+        String[] result = { "", path };
+        AttributeNodePointable anp = pp.takeOne(AttributeNodePointable.class);
+        CodedQNamePointable cqp = pp.takeOne(CodedQNamePointable.class);
+        UTF8StringPointable utf8sp = pp.takeOne(UTF8StringPointable.class);
+        TaggedValuePointable valueTVP = pp.takeOne(TaggedValuePointable.class);
+        try {
+            tvp.getValue(anp);
+            anp.getName(cqp);
+            result = printPrefixedQName(cqp, utf8sp, path);
+            buildIndexItem(deweyId, "attribute", result, path);
+
+            anp.getValue(ntp, valueTVP);
+
+            String attributeValueId = deweyId + ".0";
+            printTaggedValuePointable(valueTVP, attributeValueId, result[1]);
+
+        } finally {
+            pp.giveBack(valueTVP);
+            pp.giveBack(utf8sp);
+            pp.giveBack(anp);
+            pp.giveBack(cqp);
+        }
+    }
+
+    private void printElementNode(TaggedValuePointable tvp, String deweyId, String path) throws IOException {
+        String[] result = { "", path };
+        ElementNodePointable enp = pp.takeOne(ElementNodePointable.class);
+        CodedQNamePointable cqp = pp.takeOne(CodedQNamePointable.class);
+        UTF8StringPointable utf8sp = pp.takeOne(UTF8StringPointable.class);
+        SequencePointable seqp = pp.takeOne(SequencePointable.class);
+        try {
+            tvp.getValue(enp);
+            enp.getName(cqp);
+            result = printPrefixedQName(cqp, utf8sp, path);
+            buildIndexItem(deweyId, "element", result, path);
+
+            enp.getAttributeSequence(ntp, seqp);
+            int numattributes = 0;
+            if (seqp.getByteArray() != null && seqp.getEntryCount() > 0) {
+                printSequence(seqp, deweyId, 0, result[1]);
+                numattributes = seqp.getEntryCount();
+            }
+
+            enp.getChildrenSequence(ntp, seqp);
+            if (seqp.getByteArray() != null) {
+                printSequence(seqp, deweyId, numattributes, result[1]);
+            }
+
+        } finally {
+            pp.giveBack(seqp);
+            pp.giveBack(utf8sp);
+            pp.giveBack(cqp);
+            pp.giveBack(enp);
+        }
+    }
+
+    private String[] printPrefixedQName(CodedQNamePointable cqp, UTF8StringPointable utf8sp, String path) {
+        ntp.getString(cqp.getLocalCode(), utf8sp);
+        return printString(utf8sp, path);
+    }
+
+    private void printDocumentNode(TaggedValuePointable tvp, String deweyId, String path) throws IOException {
+        DocumentNodePointable dnp = pp.takeOne(DocumentNodePointable.class);
+        SequencePointable seqp = pp.takeOne(SequencePointable.class);
+        try {
+            tvp.getValue(dnp);
+            dnp.getContent(ntp, seqp);
+            printSequence(seqp, deweyId, 0, path);
+        } finally {
+            pp.giveBack(seqp);
+            pp.giveBack(dnp);
+        }
+    }
+
+    private void printSequence(TaggedValuePointable tvp, String deweyId, String path) throws IOException {
+        SequencePointable seqp = pp.takeOne(SequencePointable.class);
+        try {
+            tvp.getValue(seqp);
+            printSequence(seqp, deweyId, 0, path);
+        } finally {
+            pp.giveBack(seqp);
+        }
+    }
+
+    private void printSequence(SequencePointable seqp, String deweyId, int addon, String path) throws IOException {
+        VoidPointable vp = pp.takeOne(VoidPointable.class);
+        try {
+            int len = seqp.getEntryCount();
+            for (int i = 0; i < len; ++i) {
+                int location = i + addon;
+                String childID = deweyId + "." + Integer.toString(location);
+                seqp.getEntry(i, vp);
+                print(vp.getByteArray(), vp.getStartOffset(), vp.getLength(), childID, path);
+            }
+        } finally {
+            pp.giveBack(vp);
+        }
+    }
+
+    private String[] printBase64Binary(TaggedValuePointable tvp, String path) throws IOException {
+        String[] result = { "", path };
+        XSBinaryPointable bp = pp.takeOne(XSBinaryPointable.class);
+        try {
+            tvp.getValue(bp);
+            abvs.reset();
+            castToString.convertBase64Binary(bp, dOut);
+            result = printStringAbvs(path);
+        } catch (Exception e) {
+            throw new IOException(e);
+        } finally {
+            pp.giveBack(bp);
+        }
+        return result;
+    }
+
+    private String[] printBoolean(TaggedValuePointable tvp, String path) {
+        String[] result = { "", path };
+        BooleanPointable bp = pp.takeOne(BooleanPointable.class);
+        try {
+            tvp.getValue(bp);
+            result[0] = Boolean.toString(bp.getBoolean());
+            result[1] = path + "/" + result[0];
+        } finally {
+            pp.giveBack(bp);
+        }
+        return result;
+    }
+
+    private String[] printByte(TaggedValuePointable tvp, String path) {
+        String[] result = { "", path };
+        BytePointable bp = pp.takeOne(BytePointable.class);
+        try {
+            tvp.getValue(bp);
+            result[0] = Byte.toString(bp.byteValue());
+            result[1] = path + "/" + result[0];
+        } finally {
+            pp.giveBack(bp);
+        }
+        return result;
+    }
+
+    private String[] printDouble(TaggedValuePointable tvp, String path) throws IOException {
+        String[] result = { "", path };
+        DoublePointable dp = pp.takeOne(DoublePointable.class);
+        try {
+            tvp.getValue(dp);
+            abvs.reset();
+            castToString.convertDouble(dp, dOut);
+            result = printStringAbvs(path);
+        } catch (Exception e) {
+            throw new IOException(e);
+        } finally {
+            pp.giveBack(dp);
+        }
+        return result;
+    }
+
+    private String[] printDate(TaggedValuePointable tvp, String path) throws IOException {
+        String[] result = { "", path };
+        XSDatePointable dp = pp.takeOne(XSDatePointable.class);
+        try {
+            tvp.getValue(dp);
+            abvs.reset();
+            castToString.convertDate(dp, dOut);
+            result = printStringAbvs(path);
+        } catch (Exception e) {
+            throw new IOException(e);
+        } finally {
+            pp.giveBack(dp);
+        }
+        return result;
+    }
+
+    private String[] printDateTime(TaggedValuePointable tvp, String path) throws IOException {
+        String[] result = { "", path };
+        XSDateTimePointable dtp = pp.takeOne(XSDateTimePointable.class);
+        try {
+            tvp.getValue(dtp);
+            abvs.reset();
+            castToString.convertDatetime(dtp, dOut);
+            result = printStringAbvs(path);
+        } catch (Exception e) {
+            throw new IOException(e);
+        } finally {
+            pp.giveBack(dtp);
+        }
+        return result;
+    }
+
+    private String[] printDTDuration(TaggedValuePointable tvp, String path) throws IOException {
+        String[] result = { "", path };
+        LongPointable lp = pp.takeOne(LongPointable.class);
+        try {
+            tvp.getValue(lp);
+            abvs.reset();
+            castToString.convertDTDuration(lp, dOut);
+            result = printStringAbvs(path);
+        } catch (Exception e) {
+            throw new IOException(e);
+        } finally {
+            pp.giveBack(lp);
+        }
+        return result;
+    }
+
+    private String[] printDuration(TaggedValuePointable tvp, String path) throws IOException {
+        String[] result = { "", path };
+        XSDurationPointable dp = pp.takeOne(XSDurationPointable.class);
+        try {
+            tvp.getValue(dp);
+            abvs.reset();
+            castToString.convertDuration(dp, dOut);
+            result = printStringAbvs(path);
+        } catch (Exception e) {
+            throw new IOException(e);
+        } finally {
+            pp.giveBack(dp);
+        }
+        return result;
+    }
+
+    private String[] printFloat(TaggedValuePointable tvp, String path) throws IOException {
+        String[] result = { "", path };
+        FloatPointable fp = pp.takeOne(FloatPointable.class);
+        try {
+            tvp.getValue(fp);
+            abvs.reset();
+            castToString.convertFloat(fp, dOut);
+            result = printStringAbvs(path);
+        } catch (Exception e) {
+            throw new IOException(e);
+        } finally {
+            pp.giveBack(fp);
+        }
+        return result;
+    }
+
+    private String[] printGDay(TaggedValuePointable tvp, String path) throws IOException {
+        String[] result = { "", path };
+        XSDatePointable dp = pp.takeOne(XSDatePointable.class);
+        try {
+            tvp.getValue(dp);
+            abvs.reset();
+            castToString.convertGDay(dp, dOut);
+            result = printStringAbvs(path);
+        } catch (Exception e) {
+            throw new IOException(e);
+        } finally {
+            pp.giveBack(dp);
+        }
+        return result;
+    }
+
+    private String[] printGMonth(TaggedValuePointable tvp, String path) throws IOException {
+        String[] result = { "", path };
+        XSDatePointable dp = pp.takeOne(XSDatePointable.class);
+        try {
+            tvp.getValue(dp);
+            abvs.reset();
+            castToString.convertGMonth(dp, dOut);
+            result = printStringAbvs(path);
+        } catch (Exception e) {
+            throw new IOException(e);
+        } finally {
+            pp.giveBack(dp);
+        }
+        return result;
+    }
+
+    private String[] printGMonthDay(TaggedValuePointable tvp, String path) throws IOException {
+        String[] result = { "", path };
+        XSDatePointable dp = pp.takeOne(XSDatePointable.class);
+        try {
+            tvp.getValue(dp);
+            abvs.reset();
+            castToString.convertGMonthDay(dp, dOut);
+            result = printStringAbvs(path);
+        } catch (Exception e) {
+            throw new IOException(e);
+        } finally {
+            pp.giveBack(dp);
+        }
+        return result;
+    }
+
+    private String[] printGYear(TaggedValuePointable tvp, String path) throws IOException {
+        String[] result = { "", path };
+        XSDatePointable dp = pp.takeOne(XSDatePointable.class);
+        try {
+            tvp.getValue(dp);
+            abvs.reset();
+            castToString.convertGYear(dp, dOut);
+            result = printStringAbvs(path);
+        } catch (Exception e) {
+            throw new IOException(e);
+        } finally {
+            pp.giveBack(dp);
+        }
+        return result;
+    }
+
+    private String[] printGYearMonth(TaggedValuePointable tvp, String path) throws IOException {
+        String[] result = { "", path };
+        XSDatePointable dp = pp.takeOne(XSDatePointable.class);
+        try {
+            tvp.getValue(dp);
+            abvs.reset();
+            castToString.convertGYearMonth(dp, dOut);
+            result = printStringAbvs(path);
+        } catch (Exception e) {
+            throw new IOException(e);
+        } finally {
+            pp.giveBack(dp);
+        }
+        return result;
+    }
+
+    private String[] printHexBinary(TaggedValuePointable tvp, String path) throws IOException {
+        String[] result = { "", path };
+        XSBinaryPointable bp = pp.takeOne(XSBinaryPointable.class);
+        try {
+            tvp.getValue(bp);
+            abvs.reset();
+            castToString.convertHexBinary(bp, dOut);
+            result = printStringAbvs(path);
+        } catch (Exception e) {
+            throw new IOException(e);
+        } finally {
+            pp.giveBack(bp);
+        }
+        return result;
+    }
+
+    private String[] printInt(TaggedValuePointable tvp, String path) {
+        String[] result = { "", path };
+        IntegerPointable ip = pp.takeOne(IntegerPointable.class);
+        try {
+            tvp.getValue(ip);
+            result[0] = Integer.toString(ip.intValue());
+            result[1] = path + "/" + result[0];
+        } finally {
+            pp.giveBack(ip);
+        }
+        return result;
+    }
+
+    private String[] printInteger(TaggedValuePointable tvp, String path) {
+        String[] result = { "", path };
+        LongPointable lp = pp.takeOne(LongPointable.class);
+        try {
+            tvp.getValue(lp);
+            result[0] = Long.toString(lp.longValue());
+            result[1] = path + "/" + result[0];
+        } finally {
+            pp.giveBack(lp);
+        }
+        return result;
+    }
+
+    private String[] printShort(TaggedValuePointable tvp, String path) {
+        ShortPointable sp = pp.takeOne(ShortPointable.class);
+        String[] result = { "", path };
+        try {
+            tvp.getValue(sp);
+            result[0] = Short.toString(sp.shortValue());
+            result[1] = path + "/" + result[0];
+        } finally {
+            pp.giveBack(sp);
+        }
+        return result;
+    }
+
+    private String[] printQName(TaggedValuePointable tvp, String path) throws IOException {
+        XSQNamePointable dp = pp.takeOne(XSQNamePointable.class);
+        String[] result = { "", path };
+        try {
+            tvp.getValue(dp);
+            abvs.reset();
+            castToString.convertQName(dp, dOut);
+            result = printStringAbvs(path);
+        } catch (Exception e) {
+            throw new IOException(e);
+        } finally {
+            pp.giveBack(dp);
+        }
+        return result;
+    }
+
+    private String[] printStringAbvs(String path) {
+        UTF8StringPointable utf8sp = pp.takeOne(UTF8StringPointable.class);
+        String[] result = { "", path };
+        try {
+            utf8sp.set(abvs.getByteArray(), abvs.getStartOffset() + 1, abvs.getLength() - 1);
+            result = printString(utf8sp, path);
+        } finally {
+            pp.giveBack(utf8sp);
+        }
+        return result;
+    }
+
+    private String[] printString(TaggedValuePointable tvp, String path) {
+        UTF8StringPointable utf8sp = pp.takeOne(UTF8StringPointable.class);
+        String[] result = { "", path };
+        try {
+            tvp.getValue(utf8sp);
+            result = printString(utf8sp, path);
+        } finally {
+            pp.giveBack(utf8sp);
+        }
+        return result;
+    }
+
+    private String[] printString(UTF8StringPointable utf8sp, String path) {
+        int utfLen = utf8sp.getUTFLength();
+        int offset = 2;
+        String[] result = { "", path };
+        while (utfLen > 0) {
+            char c = utf8sp.charAt(offset);
+            switch (c) {
+                case '<':
+                    result[0] += "&lt;";
+                    break;
+
+                case '>':
+                    result[0] += "&gt;";
+                    break;
+
+                case '&':
+                    result[0] += "&amp;";
+                    break;
+
+                case '"':
+                    result[0] += "&quot;";
+                    break;
+
+                case '\'':
+                    result[0] += "&apos;";
+                    break;
+
+                default:
+                    result[0] += Character.toString(c);
+                    break;
+            }
+            int cLen = UTF8StringPointable.getModifiedUTF8Len(c);
+            offset += cLen;
+            utfLen -= cLen;
+
+        }
+        result[1] = path + "/" + result[0];
+        return result;
+    }
+
+    private String[] printTime(TaggedValuePointable tvp, String path) throws IOException {
+        XSTimePointable tp = pp.takeOne(XSTimePointable.class);
+        String[] result = { "", path };
+        try {
+            tvp.getValue(tp);
+            abvs.reset();
+            castToString.convertTime(tp, dOut);
+            result = printStringAbvs(path);
+        } catch (Exception e) {
+            throw new IOException(e);
+        } finally {
+            pp.giveBack(tp);
+        }
+        return result;
+    }
+
+    private String[] printYMDuration(TaggedValuePointable tvp, String path) throws IOException {
+        IntegerPointable ip = pp.takeOne(IntegerPointable.class);
+        String[] result = { "", path };
+        try {
+            tvp.getValue(ip);
+            abvs.reset();
+            castToString.convertYMDuration(ip, dOut);
+            result = printStringAbvs(path);
+        } catch (Exception e) {
+            throw new IOException(e);
+        } finally {
+            pp.giveBack(ip);
+        }
+        return result;
+    }
+
+}
\ No newline at end of file

diff --git a/vxquery-core/src/main/java/org/apache/vxquery/index/IndexElement.java b/vxquery-core/src/main/java/org/apache/vxquery/index/IndexElement.java
new file mode 100644
index 0000000..d2487a5
--- /dev/null
+++ b/vxquery-core/src/main/java/org/apache/vxquery/index/IndexElement.java

@@ -0,0 +1,42 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.vxquery.index;
+
+public class IndexElement {
+    private String id;
+    private String type;
+    private String elementpath;
+
+    public IndexElement(String id, String type, String elementpath) {
+        this.id = id;
+        this.type = type;
+        this.elementpath = elementpath;
+    }
+
+    public String id() {
+        return id;
+    }
+
+    public String type() {
+        return type;
+    }
+
+    public String epath() {
+        return elementpath;
+    }
+
+}
\ No newline at end of file

diff --git a/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryCollectionOperatorDescriptor.java b/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryCollectionOperatorDescriptor.java
index b8dca63..ef51cee 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryCollectionOperatorDescriptor.java
+++ b/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryCollectionOperatorDescriptor.java

@@ -18,8 +18,6 @@
 
 import java.io.ByteArrayInputStream;
 import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
 import java.io.InputStream;
 import java.net.InetAddress;
 import java.nio.ByteBuffer;
@@ -31,8 +29,6 @@
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
-import javax.xml.parsers.ParserConfigurationException;
-
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.io.filefilter.TrueFileFilter;
 import org.apache.commons.lang.StringUtils;
@@ -67,7 +63,6 @@
 import org.apache.vxquery.xmlparser.ITreeNodeIdProvider;
 import org.apache.vxquery.xmlparser.TreeNodeIdProvider;
 import org.apache.vxquery.xmlparser.XMLParser;
-import org.xml.sax.SAXException;
 
 public class VXQueryCollectionOperatorDescriptor extends AbstractSingleActivityOperatorDescriptor {
     private static final long serialVersionUID = 1L;
@@ -179,50 +174,34 @@
                                 for (int i = 0; i < size; i++) {
                                     //read split
                                     context = ctxFactory.createContext(job.getConfiguration(), i);
-                                    try {
-                                        reader = inputFormat.createRecordReader(inputSplits.get(i), context);
-                                        reader.initialize(inputSplits.get(i), context);
-                                        while (reader.nextKeyValue()) {
-                                            value = reader.getCurrentValue().toString();
-                                            //Split value if it contains more than one item with the tag
-                                            if (StringUtils.countMatches(value, tag) > 1) {
-                                                String items[] = value.split(tag);
-                                                for (String item : items) {
-                                                    if (item.length() > 0) {
-                                                        item = START_TAG + tag + item;
-                                                        stream = new ByteArrayInputStream(
-                                                                item.getBytes(StandardCharsets.UTF_8));
-                                                        parser.parseHDFSElements(stream, writer, fta, i);
-                                                    }
-                                                }
-                                            } else {
-                                                value = START_TAG + value;
-                                                //create an input stream to the file currently reading and send it to parser
-                                                stream = new ByteArrayInputStream(
-                                                        value.getBytes(StandardCharsets.UTF_8));
-                                                parser.parseHDFSElements(stream, writer, fta, i);
-                                            }
-                                        }
 
-                                    } catch (InterruptedException e) {
-                                        if (LOGGER.isLoggable(Level.SEVERE)) {
-                                            LOGGER.severe(e.getMessage());
+                                    reader = inputFormat.createRecordReader(inputSplits.get(i), context);
+                                    reader.initialize(inputSplits.get(i), context);
+                                    while (reader.nextKeyValue()) {
+                                        value = reader.getCurrentValue().toString();
+                                        //Split value if it contains more than one item with the tag
+                                        if (StringUtils.countMatches(value, tag) > 1) {
+                                            String items[] = value.split(tag);
+                                            for (String item : items) {
+                                                if (item.length() > 0) {
+                                                    item = START_TAG + tag + item;
+                                                    stream = new ByteArrayInputStream(
+                                                            item.getBytes(StandardCharsets.UTF_8));
+                                                    parser.parseHDFSElements(stream, writer, fta, i);
+                                                }
+                                            }
+                                        } else {
+                                            value = START_TAG + value;
+                                            //create an input stream to the file currently reading and send it to parser
+                                            stream = new ByteArrayInputStream(value.getBytes(StandardCharsets.UTF_8));
+                                            parser.parseHDFSElements(stream, writer, fta, i);
                                         }
                                     }
+
                                 }
 
-                            } catch (IOException e) {
-                                if (LOGGER.isLoggable(Level.SEVERE)) {
-                                    LOGGER.severe(e.getMessage());
-                                }
-                            } catch (ParserConfigurationException e) {
-                                if (LOGGER.isLoggable(Level.SEVERE)) {
-                                    LOGGER.severe(e.getMessage());
-                                }
-                            } catch (SAXException e) {
-                                if (LOGGER.isLoggable(Level.SEVERE)) {
-                                    LOGGER.severe(e.getMessage());
-                                }
+                            } catch (Exception e) {
+                                throw new HyracksDataException(e);
                             }
                         } else {
                             try {
@@ -248,22 +227,14 @@
                                     throw new HyracksDataException("Invalid HDFS directory parameter (" + nodeId + ":"
                                             + directory + ") passed to collection.");
                                 }
-                            } catch (FileNotFoundException e) {
-                                if (LOGGER.isLoggable(Level.SEVERE)) {
-                                    LOGGER.severe(e.getMessage());
-                                }
-                            } catch (IOException e) {
-                                if (LOGGER.isLoggable(Level.SEVERE)) {
-                                    LOGGER.severe(e.getMessage());
-                                }
+                            } catch (Exception e) {
+                                throw new HyracksDataException(e);
                             }
                         }
                         try {
                             fs.close();
-                        } catch (IOException e) {
-                            if (LOGGER.isLoggable(Level.SEVERE)) {
-                                LOGGER.severe(e.getMessage());
-                            }
+                        } catch (Exception e) {
+                            throw new HyracksDataException(e);
                         }
                     }
                 }

diff --git a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/CaseSensitiveAnalyzer.java b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/CaseSensitiveAnalyzer.java
new file mode 100644
index 0000000..803aeee
--- /dev/null
+++ b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/CaseSensitiveAnalyzer.java

@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.vxquery.runtime.functions.index;
+
+import java.io.IOException;
+import java.io.Reader;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.StopAnalyzer;
+import org.apache.lucene.analysis.core.StopFilter;
+import org.apache.lucene.analysis.standard.ClassicAnalyzer;
+import org.apache.lucene.analysis.standard.ClassicTokenizer;
+import org.apache.lucene.analysis.standard.StandardFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.standard.std40.StandardTokenizer40;
+import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+import org.apache.lucene.analysis.util.WordlistLoader;
+import org.apache.lucene.util.Version;
+
+/**
+ * Filters {@link StandardTokenizer} with {@link StandardFilter},
+ * and {@link StopFilter}, using a list of
+ * English stop words.
+ * <a name="version"/>
+ * <p>
+ * You must specify the required {@link Version}
+ * compatibility when creating StandardAnalyzer:
+ * <ul>
+ * <li>As of 3.4, Hiragana and Han characters are no longer wrongly split
+ * from their combining characters. If you use a previous version number,
+ * you get the exact broken behavior for backwards compatibility.
+ * <li>As of 3.1, StandardTokenizer implements Unicode text segmentation,
+ * and StopFilter correctly handles Unicode 4.0 supplementary characters
+ * in stopwords. {@link ClassicTokenizer} and {@link ClassicAnalyzer}
+ * are the pre-3.1 implementations of StandardTokenizer and
+ * StandardAnalyzer.
+ * <li>As of 2.9, StopFilter preserves position increments
+ * <li>As of 2.4, Tokens incorrectly identified as acronyms
+ * are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1068</a>)
+ * </ul>
+ */
+public final class CaseSensitiveAnalyzer extends StopwordAnalyzerBase {
+
+    /** Default maximum allowed token length */
+    public static final int DEFAULT_MAX_TOKEN_LENGTH = 255;
+
+    private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
+
+    /**
+     * An unmodifiable set containing some common English words that are usually not
+     * useful for searching.
+     */
+    public static final CharArraySet STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
+
+    /**
+     * Builds an analyzer with the given stop words.
+     * 
+     * @param stopWords
+     *            stop words
+     */
+    public CaseSensitiveAnalyzer(CharArraySet stopWords) {
+        super(stopWords);
+    }
+
+    /**
+     * Builds an analyzer with the default stop words ({@link #STOP_WORDS_SET}).
+     */
+    public CaseSensitiveAnalyzer() {
+        this(STOP_WORDS_SET);
+    }
+
+    /**
+     * Builds an analyzer with the stop words from the given reader.
+     * 
+     * @see WordlistLoader#getWordSet(Reader)
+     * @param stopwords
+     *            Reader to read stop words from
+     */
+    public CaseSensitiveAnalyzer(Reader stopwords) throws IOException {
+        this(loadStopwordSet(stopwords));
+    }
+
+    /**
+     * Set maximum allowed token length. If a token is seen
+     * that exceeds this length then it is discarded. This
+     * setting only takes effect the next time tokenStream or
+     * tokenStream is called.
+     */
+    public void setMaxTokenLength(int length) {
+        maxTokenLength = length;
+    }
+
+    /**
+     * @see #setMaxTokenLength
+     */
+    public int getMaxTokenLength() {
+        return maxTokenLength;
+    }
+
+    @Override
+    protected TokenStreamComponents createComponents(final String fieldName) {
+        final Tokenizer src;
+        if (getVersion().onOrAfter(Version.LUCENE_4_7_0)) {
+            StandardTokenizer t = new StandardTokenizer();
+            t.setMaxTokenLength(maxTokenLength);
+            src = t;
+        } else {
+            StandardTokenizer40 t = new StandardTokenizer40();
+            t.setMaxTokenLength(maxTokenLength);
+            src = t;
+        }
+        TokenStream tok = new StandardFilter(src);
+        tok = new StopFilter(tok, stopwords);
+        return new TokenStreamComponents(src, tok) {
+            @Override
+            protected void setReader(final Reader reader) {
+                int m = CaseSensitiveAnalyzer.this.maxTokenLength;
+                if (src instanceof StandardTokenizer) {
+                    ((StandardTokenizer) src).setMaxTokenLength(m);
+                } else {
+                    ((StandardTokenizer40) src).setMaxTokenLength(m);
+                }
+                super.setReader(reader);
+            }
+        };
+    }
+}
\ No newline at end of file

diff --git a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/CaseSensitiveQueryParser.java b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/CaseSensitiveQueryParser.java
new file mode 100644
index 0000000..7cb0a18
--- /dev/null
+++ b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/CaseSensitiveQueryParser.java

@@ -0,0 +1,42 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.vxquery.runtime.functions.index;
+
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queryparser.classic.FastCharStream;
+import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.queryparser.classic.QueryParser;
+import org.apache.lucene.search.Query;
+
+public class CaseSensitiveQueryParser extends QueryParser {
+
+    public CaseSensitiveQueryParser(String f, Analyzer a) {
+        super(new FastCharStream(new StringReader("")));
+        init(f, a);
+    }
+
+    @Override
+    protected Query getPrefixQuery(String field, String termStr) throws ParseException {
+        if (!getAllowLeadingWildcard() && termStr.startsWith("*"))
+            throw new ParseException("'*' not allowed as first character in PrefixQuery");
+        Term t = new Term(field, termStr);
+        return newPrefixQuery(t);
+    }
+}
\ No newline at end of file

diff --git a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/CollectionFromIndexUnnestingEvaluatorFactory.java b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/CollectionFromIndexUnnestingEvaluatorFactory.java
new file mode 100644
index 0000000..cf0b203
--- /dev/null
+++ b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/CollectionFromIndexUnnestingEvaluatorFactory.java

@@ -0,0 +1,327 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.vxquery.runtime.functions.index;
+
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator;
+import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
+import org.apache.hyracks.algebricks.runtime.base.IUnnestingEvaluator;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.data.std.api.IPointable;
+import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
+import org.apache.hyracks.dataflow.common.comm.util.ByteBufferInputStream;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.queryparser.classic.QueryParser;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.vxquery.datamodel.accessors.TaggedValuePointable;
+import org.apache.vxquery.datamodel.values.ValueTag;
+import org.apache.vxquery.exceptions.ErrorCode;
+import org.apache.vxquery.exceptions.SystemException;
+import org.apache.vxquery.index.IndexAttributes;
+import org.apache.vxquery.runtime.functions.base.AbstractTaggedValueArgumentUnnestingEvaluator;
+import org.apache.vxquery.runtime.functions.base.AbstractTaggedValueArgumentUnnestingEvaluatorFactory;
+import org.apache.vxquery.xmlparser.ITreeNodeIdProvider;
+import org.apache.vxquery.xmlparser.SAXContentHandler;
+import org.apache.vxquery.xmlparser.TreeNodeIdProvider;
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+
+public class CollectionFromIndexUnnestingEvaluatorFactory extends AbstractTaggedValueArgumentUnnestingEvaluatorFactory {
+    private static final long serialVersionUID = 1L;
+
+    public CollectionFromIndexUnnestingEvaluatorFactory(IScalarEvaluatorFactory[] args) {
+        super(args);
+    }
+
+    @Override
+    protected IUnnestingEvaluator createEvaluator(IHyracksTaskContext ctx, IScalarEvaluator[] args)
+            throws AlgebricksException {
+
+        return new AbstractTaggedValueArgumentUnnestingEvaluator(args) {
+
+            private ArrayBackedValueStorage nodeAbvs = new ArrayBackedValueStorage();
+
+            private int indexPlace;
+            private int indexLength;
+            private String elementPath;
+            private String indexName;
+
+            private UTF8StringPointable stringIndexFolder = (UTF8StringPointable) UTF8StringPointable.FACTORY
+                    .createPointable();
+            private UTF8StringPointable stringElementPath = (UTF8StringPointable) UTF8StringPointable.FACTORY
+                    .createPointable();
+            private ByteBufferInputStream bbis = new ByteBufferInputStream();
+            private DataInputStream di = new DataInputStream(bbis);
+
+            private IndexReader reader;
+            private IndexSearcher searcher;
+            private Analyzer analyzer;
+            private QueryParser parser;
+            private ScoreDoc[] hits;
+            private SAXContentHandler handler;
+            private Query query;
+            private Document doc;
+            private List<IndexableField> fields;
+
+            @Override
+            public boolean step(IPointable result) throws AlgebricksException {
+                /* each step will create a tuple for a single xml file
+                 * This is done using the parse function
+                 * checkoverflow is used throughout. This is because memory might not be
+                 * able to hold all of the results at once, so we return 1 million at
+                 * a time and check when we need to get more
+                 */
+                if (indexPlace < indexLength) {
+                    nodeAbvs.reset();
+                    try {
+                        //TODO: now we get back the entire document
+                        doc = searcher.doc(hits[indexPlace].doc);
+                        fields = doc.getFields();
+                        parse(nodeAbvs);
+                    } catch (IOException e) {
+                        throw new AlgebricksException(e);
+                    }
+                    indexPlace += 1;
+                    result.set(nodeAbvs.getByteArray(), nodeAbvs.getStartOffset(), nodeAbvs.getLength());
+                    return true;
+                }
+                return false;
+            }
+
+            @Override
+            protected void init(TaggedValuePointable[] args) throws SystemException {
+
+                int partition = ctxview.getTaskAttemptId().getTaskId().getPartition();
+                ITreeNodeIdProvider nodeIdProvider = new TreeNodeIdProvider((short) partition);
+                handler = new SAXContentHandler(false, nodeIdProvider, true);
+
+                nodeAbvs.reset();
+                indexPlace = 0;
+                TaggedValuePointable tvp1 = args[0];
+                TaggedValuePointable tvp2 = args[1];
+
+                if (tvp1.getTag() != ValueTag.XS_STRING_TAG || tvp2.getTag() != ValueTag.XS_STRING_TAG) {
+                    throw new SystemException(ErrorCode.FORG0006);
+                }
+                tvp1.getValue(stringIndexFolder);
+                tvp2.getValue(stringElementPath);
+                //This whole loop is to get the string arguments, indefolder, elementpath, and match option
+                try {
+                    // Get the list of files.
+                    bbis.setByteBuffer(ByteBuffer.wrap(
+                            Arrays.copyOfRange(stringIndexFolder.getByteArray(), stringIndexFolder.getStartOffset(),
+                                    stringIndexFolder.getLength() + stringIndexFolder.getStartOffset())),
+                            0);
+                    indexName = di.readUTF();
+                    bbis.setByteBuffer(ByteBuffer.wrap(
+                            Arrays.copyOfRange(stringElementPath.getByteArray(), stringElementPath.getStartOffset(),
+                                    stringElementPath.getLength() + stringElementPath.getStartOffset())),
+                            0);
+                    elementPath = di.readUTF();
+
+                    indexPlace = 0;
+
+                    //Create the index reader.
+                    reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexName)));
+                } catch (IOException e) {
+                    throw new SystemException(ErrorCode.SYSE0001, e);
+                }
+
+                searcher = new IndexSearcher(reader);
+                analyzer = new CaseSensitiveAnalyzer();
+
+                parser = new CaseSensitiveQueryParser("item", analyzer);
+
+                String queryString = elementPath.replaceAll("/", ".");
+                queryString = "item:" + queryString + "*";
+
+                int lastslash = elementPath.lastIndexOf("/");
+                elementPath = elementPath.substring(0, lastslash) + ":" + elementPath.substring(lastslash + 1);
+                elementPath = elementPath.replaceAll("/", ".") + ".element";
+
+                TopDocs results = null;
+                try {
+                    query = parser.parse(queryString);
+
+                    //TODO: Right now it only returns 1000000 results
+                    results = searcher.search(query, 1000000);
+
+                } catch (Exception e) {
+                    throw new SystemException(null);
+                }
+
+                hits = results.scoreDocs;
+                System.out.println("found: " + results.totalHits);
+                indexPlace = 0;
+                indexLength = hits.length;
+
+            }
+
+            public void parse(ArrayBackedValueStorage abvsFileNode) throws IOException {
+                try {
+                    handler.startDocument();
+
+                    for (int i = 0; i < fields.size(); i++) {
+                        String fieldValue = fields.get(i).stringValue();
+                        if (fieldValue.equals(elementPath)) {
+                            buildElement(abvsFileNode, i);
+                        }
+                    }
+
+                    handler.endDocument();
+                    handler.writeDocument(abvsFileNode);
+                } catch (Exception e) {
+                    throw new IOException(e);
+                }
+            }
+
+            private int buildElement(ArrayBackedValueStorage abvsFileNode, int fieldNum) throws SAXException {
+                int whereIFinish = fieldNum;
+                IndexableField field = fields.get(fieldNum);
+                String contents = field.stringValue();
+                String uri = "";
+
+                int firstColon = contents.indexOf(':');
+                int lastDot = contents.lastIndexOf('.');
+                String type = contents.substring(lastDot + 1);
+                String lastBit = contents.substring(firstColon + 1, lastDot);
+
+                if (type.equals("textnode")) {
+                    char[] charContents = lastBit.toCharArray();
+                    handler.characters(charContents, 0, charContents.length);
+
+                }
+                if (type.equals("element")) {
+                    List<String> names = new ArrayList<String>();
+                    List<String> values = new ArrayList<String>();
+                    List<String> uris = new ArrayList<String>();
+                    List<String> localNames = new ArrayList<String>();
+                    List<String> types = new ArrayList<String>();
+                    List<String> qNames = new ArrayList<String>();
+                    whereIFinish = findAttributeChildren(whereIFinish, names, values, uris, localNames, types, qNames);
+                    Attributes atts = new IndexAttributes(names, values, uris, localNames, types, qNames);
+
+                    handler.startElement(uri, lastBit, lastBit, atts);
+
+                    boolean noMoreChildren = false;
+
+                    while (whereIFinish + 1 < fields.size() && !noMoreChildren) {
+                        if (isChild(fields.get(whereIFinish + 1), field)) {
+                            whereIFinish = buildElement(abvsFileNode, whereIFinish + 1);
+                        } else {
+                            noMoreChildren = true;
+                        }
+                    }
+
+                    handler.endElement(uri, lastBit, lastBit);
+
+                }
+                return whereIFinish;
+            }
+
+            /*This function creates the attribute children for an element node
+             * 
+             */
+            int findAttributeChildren(int fieldnum, List<String> n, List<String> v, List<String> u, List<String> l,
+                    List<String> t, List<String> q) {
+                int nextindex = fieldnum + 1;
+                boolean foundattributes = false;
+                if (nextindex < fields.size()) {
+                    IndexableField nextguy;
+
+                    while (nextindex < fields.size()) {
+                        nextguy = fields.get(nextindex);
+                        String contents = nextguy.stringValue();
+                        int firstcolon = contents.indexOf(':');
+                        int lastdot = contents.lastIndexOf('.');
+                        String lastbit = contents.substring(firstcolon + 1, lastdot);
+
+                        if (isDirectChildAttribute(nextguy, fields.get(fieldnum))) {
+                            foundattributes = true;
+                            n.add(lastbit);
+                            IndexableField nextnextguy = fields.get(nextindex + 1);
+                            contents = nextnextguy.stringValue();
+                            firstcolon = contents.indexOf(':');
+                            lastdot = contents.lastIndexOf('.');
+                            String nextlastbit = contents.substring(firstcolon + 1, lastdot);
+                            v.add(nextlastbit);
+                            u.add(lastbit);
+                            l.add(lastbit);
+                            t.add(lastbit);
+                            q.add(lastbit);
+                        } else {
+                            break;
+                        }
+                        nextindex += 2;
+                    }
+                }
+                if (foundattributes) {
+                    return nextindex - 1;
+
+                } else {
+                    return fieldnum;
+                }
+            }
+
+            boolean isChild(IndexableField child, IndexableField adult) {
+                String childId = child.stringValue();
+                String adultId = adult.stringValue();
+
+                int lastDotChild = childId.lastIndexOf('.');
+                int lastDotAdult = adultId.lastIndexOf('.');
+
+                String childPath = childId.substring(0, lastDotChild);
+                String adultPath = adultId.substring(0, lastDotAdult);
+                adultPath = adultPath.replaceFirst(":", ".");
+
+                return (childPath.startsWith(adultPath + ":") || childPath.startsWith(adultPath + "."));
+            }
+
+            boolean isDirectChildAttribute(IndexableField child, IndexableField adult) {
+                String childId = child.stringValue();
+                String adultId = adult.stringValue();
+
+                String childPath = childId.substring(0, childId.lastIndexOf('.'));
+                String adultPath = adultId.substring(0, adultId.lastIndexOf('.'));
+                adultPath = adultPath.replaceFirst(":", ".");
+                String[] childSegments = child.stringValue().split("\\.");
+
+                String childType = childSegments[childSegments.length - 1];
+
+                return (childPath.startsWith(adultPath + ":") && childType.equals("attribute"));
+            }
+
+        };
+    }
+}
\ No newline at end of file

diff --git a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/IndexConstructorScalarEvaluatorFactory.java b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/IndexConstructorScalarEvaluatorFactory.java
new file mode 100644
index 0000000..c3776d9
--- /dev/null
+++ b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/IndexConstructorScalarEvaluatorFactory.java

@@ -0,0 +1,70 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.vxquery.runtime.functions.index;
+
+import java.io.DataInputStream;
+
+import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator;
+import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.data.std.api.IPointable;
+import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
+import org.apache.hyracks.dataflow.common.comm.util.ByteBufferInputStream;
+import org.apache.vxquery.datamodel.accessors.TaggedValuePointable;
+import org.apache.vxquery.datamodel.builders.sequence.SequenceBuilder;
+import org.apache.vxquery.exceptions.SystemException;
+import org.apache.vxquery.runtime.functions.base.AbstractTaggedValueArgumentScalarEvaluator;
+import org.apache.vxquery.runtime.functions.base.AbstractTaggedValueArgumentScalarEvaluatorFactory;
+import org.apache.vxquery.xmlparser.ITreeNodeIdProvider;
+import org.apache.vxquery.xmlparser.TreeNodeIdProvider;
+
+public class IndexConstructorScalarEvaluatorFactory extends AbstractTaggedValueArgumentScalarEvaluatorFactory {
+    //Creates one Lucene doc per file
+
+    private static final long serialVersionUID = 1L;
+
+    public IndexConstructorScalarEvaluatorFactory(IScalarEvaluatorFactory[] args) {
+        super(args);
+    }
+
+    @Override
+    protected IScalarEvaluator createEvaluator(IHyracksTaskContext ctx, IScalarEvaluator[] args)
+            throws AlgebricksException {
+        final ArrayBackedValueStorage abvs = new ArrayBackedValueStorage();
+        final UTF8StringPointable stringp = (UTF8StringPointable) UTF8StringPointable.FACTORY.createPointable();
+        final TaggedValuePointable nodep = (TaggedValuePointable) TaggedValuePointable.FACTORY.createPointable();
+        final ByteBufferInputStream bbis = new ByteBufferInputStream();
+        final DataInputStream di = new DataInputStream(bbis);
+        final SequenceBuilder sb = new SequenceBuilder();
+        final ArrayBackedValueStorage abvsFileNode = new ArrayBackedValueStorage();
+        final int partition = ctx.getTaskAttemptId().getTaskId().getPartition();
+        final String nodeId = ctx.getJobletContext().getApplicationContext().getNodeId();
+        final ITreeNodeIdProvider nodeIdProvider = new TreeNodeIdProvider((short) partition);
+
+        return new AbstractTaggedValueArgumentScalarEvaluator(args) {
+
+            @Override
+            protected void evaluate(TaggedValuePointable[] args, IPointable result) throws SystemException {
+                IndexConstructorUtil.evaluate(args, result, stringp, bbis, di, sb, abvs, nodeIdProvider, abvsFileNode,
+                        nodep, false, nodeId);
+            }
+
+        };
+    }
+}

diff --git a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/IndexConstructorUtil.java b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/IndexConstructorUtil.java
new file mode 100644
index 0000000..7191827
--- /dev/null
+++ b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/IndexConstructorUtil.java

@@ -0,0 +1,144 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.vxquery.runtime.functions.index;
+
+import java.io.DataInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.file.Paths;
+import java.util.Arrays;
+
+import org.apache.hyracks.data.std.api.IPointable;
+import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
+import org.apache.hyracks.dataflow.common.comm.util.ByteBufferInputStream;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.vxquery.datamodel.accessors.TaggedValuePointable;
+import org.apache.vxquery.datamodel.builders.sequence.SequenceBuilder;
+import org.apache.vxquery.datamodel.values.ValueTag;
+import org.apache.vxquery.exceptions.ErrorCode;
+import org.apache.vxquery.exceptions.SystemException;
+import org.apache.vxquery.index.IndexDocumentBuilder;
+import org.apache.vxquery.runtime.functions.util.FunctionHelper;
+import org.apache.vxquery.xmlparser.ITreeNodeIdProvider;
+import org.apache.vxquery.xmlparser.XMLParser;
+
+public class IndexConstructorUtil {
+    public static void evaluate(TaggedValuePointable[] args, IPointable result, UTF8StringPointable stringp,
+            ByteBufferInputStream bbis, DataInputStream di, SequenceBuilder sb, ArrayBackedValueStorage abvs,
+            ITreeNodeIdProvider nodeIdProvider, ArrayBackedValueStorage abvsFileNode, TaggedValuePointable nodep,
+            boolean isElementPath, String nodeId) throws SystemException {
+        String collectionFolder;
+        String indexFolder;
+        TaggedValuePointable collectionTVP = args[0];
+        TaggedValuePointable indexTVP = args[1];
+
+        if (collectionTVP.getTag() != ValueTag.XS_STRING_TAG || indexTVP.getTag() != ValueTag.XS_STRING_TAG) {
+            throw new SystemException(ErrorCode.FORG0006);
+        }
+
+        try {
+            // Get the list of files.
+            collectionTVP.getValue(stringp);
+            bbis.setByteBuffer(ByteBuffer.wrap(Arrays.copyOfRange(stringp.getByteArray(), stringp.getStartOffset(),
+                    stringp.getLength() + stringp.getStartOffset())), 0);
+            collectionFolder = di.readUTF();
+
+            // Get the index folder
+            indexTVP.getValue(stringp);
+            bbis.setByteBuffer(ByteBuffer.wrap(Arrays.copyOfRange(stringp.getByteArray(), stringp.getStartOffset(),
+                    stringp.getLength() + stringp.getStartOffset())), 0);
+            indexFolder = di.readUTF();
+        } catch (IOException e) {
+            throw new SystemException(ErrorCode.SYSE0001, e);
+        }
+        File collectionDirectory = new File(collectionFolder);
+        if (!collectionDirectory.exists()) {
+            throw new RuntimeException("The collection directory (" + collectionFolder + ") does not exist.");
+        }
+
+        try {
+            abvs.reset();
+            sb.reset(abvs);
+
+            Directory dir = FSDirectory.open(Paths.get(indexFolder));
+            Analyzer analyzer = new CaseSensitiveAnalyzer();
+            IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
+
+            // Create will overwrite the index everytime
+            iwc.setOpenMode(OpenMode.CREATE);
+
+            //Create an index writer
+            IndexWriter writer = new IndexWriter(dir, iwc);
+
+            //Add files to index
+            indexXmlFiles(collectionDirectory, writer, isElementPath, nodep, abvsFileNode, nodeIdProvider, sb, bbis, di,
+                    nodeId);
+
+            //This makes write slower but search faster.
+            writer.forceMerge(1);
+
+            writer.close();
+
+            sb.finish();
+            result.set(abvs);
+        } catch (IOException e) {
+            throw new SystemException(ErrorCode.SYSE0001, e);
+        }
+    }
+
+    /*This function goes recursively one file at a time. First it turns the file into an ABVS document node, then
+     * it indexes that document node.
+     */
+    public static void indexXmlFiles(File collectionDirectory, IndexWriter writer, boolean isElementPath,
+            TaggedValuePointable nodep, ArrayBackedValueStorage abvsFileNode, ITreeNodeIdProvider nodeIdProvider,
+            SequenceBuilder sb, ByteBufferInputStream bbis, DataInputStream di, String nodeId)
+                    throws SystemException, IOException {
+        for (File file : collectionDirectory.listFiles()) {
+
+            if (readableXmlFile(file.getPath())) {
+                abvsFileNode.reset();
+                // Get the document node
+                XMLParser parser = new XMLParser(false, nodeIdProvider, nodeId);
+                FunctionHelper.readInDocFromString(file.getPath(), bbis, di, abvsFileNode, parser);
+
+                nodep.set(abvsFileNode.getByteArray(), abvsFileNode.getStartOffset(), abvsFileNode.getLength());
+
+                //Add the document to the index
+                //Creates one lucene doc per file
+                IndexDocumentBuilder ibuilder = new IndexDocumentBuilder(nodep, writer);
+
+                ibuilder.printStart();
+
+            } else if (file.isDirectory()) {
+                // Consider all XML file in sub directories.
+                indexXmlFiles(file, writer, isElementPath, nodep, abvsFileNode, nodeIdProvider, sb, bbis, di, nodeId);
+            }
+        }
+    }
+
+    public static boolean readableXmlFile(String path) {
+        return (path.toLowerCase().endsWith(".xml") || path.toLowerCase().endsWith(".xml.gz"));
+    }
+
+}
\ No newline at end of file

diff --git a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/util/FunctionHelper.java b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/util/FunctionHelper.java
index d394bbc..b6668ba 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/util/FunctionHelper.java
+++ b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/util/FunctionHelper.java

@@ -480,7 +480,7 @@
 
     public static boolean compareTaggedValues(AbstractValueComparisonOperation aOp, TaggedValuePointable tvp1,
             TaggedValuePointable tvp2, DynamicContext dCtx, TypedPointables tp1, TypedPointables tp2)
-            throws SystemException {
+                    throws SystemException {
         int tid1 = getBaseTypeForComparisons(tvp1.getTag());
         int tid2 = getBaseTypeForComparisons(tvp2.getTag());
 
@@ -1217,6 +1217,11 @@
         } catch (SystemException e) {
             throw new HyracksDataException(e);
         }
+        readInDocFromString(fName, bbis, di, abvs, parser);
+    }
+
+    public static void readInDocFromString(String fName, ByteBufferInputStream bbis, DataInputStream di,
+            ArrayBackedValueStorage abvs, XMLParser parser) throws HyracksDataException {
         if (!fName.contains("hdfs:/")) {
             File file = new File(fName);
             if (file.exists()) {

diff --git a/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java b/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java
index 03a125b..846c27b 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java
+++ b/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java

@@ -51,40 +51,41 @@
 
 public class SAXContentHandler implements ContentHandler, LexicalHandler {
     // XML node builders
-    private final AttributeNodeBuilder anb;
-    private final CommentNodeBuilder cnb;
-    private final DictionaryBuilder db;
-    private final DocumentNodeBuilder docb;
-    private final PINodeBuilder pinb;
-    private final TextNodeBuilder tnb;
-    private final UTF8StringBuilder utf8b;
-    private final List<ElementNodeBuilder> enbStack;
-    private final List<ElementNodeBuilder> freeENBList;
+    protected final AttributeNodeBuilder anb;
+    protected final CommentNodeBuilder cnb;
+    protected final DictionaryBuilder db;
+    protected final DocumentNodeBuilder docb;
+    protected final PINodeBuilder pinb;
+    protected final TextNodeBuilder tnb;
+    protected final UTF8StringBuilder utf8b;
+    protected final List<ElementNodeBuilder> enbStack;
+    protected final List<ElementNodeBuilder> freeENBList;
+    protected boolean isIndexHandler;
 
     // Frame writing variables
-    private IFrameFieldAppender appender;
+    protected IFrameFieldAppender appender;
     private int tupleIndex;
     private IFrameWriter writer;
 
     // Element writing and path step variables
-    private boolean skipping;
+    protected boolean skipping;
     private String[] childLocalName = null;
     private String[] childUri = null;
     private boolean[] subElement = null;
     private final TaggedValuePointable tvp;
 
     // Basic tracking and setting variables
-    private final boolean attachTypes;
-    private final boolean createNodeIds;
+    protected final boolean attachTypes;
+    protected final boolean createNodeIds;
     private int depth;
-    private final ArrayBackedValueStorage resultABVS;
-    private boolean pendingText;
-    private int nodeIdCounter;
-    private final ITreeNodeIdProvider nodeIdProvider;
-    private final ArrayBackedValueStorage tempABVS;
+    protected final ArrayBackedValueStorage resultABVS;
+    protected boolean pendingText;
+    protected int nodeIdCounter;
+    protected final ITreeNodeIdProvider nodeIdProvider;
+    protected final ArrayBackedValueStorage tempABVS;
     private final ArrayBackedValueStorage textABVS;
 
-    public SAXContentHandler(boolean attachTypes, ITreeNodeIdProvider nodeIdProvider) {
+    public SAXContentHandler(boolean attachTypes, ITreeNodeIdProvider nodeIdProvider, boolean isIndexHandler) {
         // XML node builders
         anb = new AttributeNodeBuilder();
         cnb = new CommentNodeBuilder();
@@ -110,11 +111,16 @@
         this.nodeIdProvider = nodeIdProvider;
         tempABVS = new ArrayBackedValueStorage();
         textABVS = new ArrayBackedValueStorage();
+        this.isIndexHandler = isIndexHandler;
+        if (isIndexHandler) {
+            this.appender = null;
+            this.skipping = false;
+        }
     }
 
     public SAXContentHandler(boolean attachTypes, ITreeNodeIdProvider nodeIdProvider, IFrameFieldAppender appender,
             List<SequenceType> childSequenceTypes) {
-        this(attachTypes, nodeIdProvider);
+        this(attachTypes, nodeIdProvider, false);
 
         // Frame writing variables
         this.appender = appender;
@@ -189,16 +195,21 @@
             return;
         }
         try {
-            boolean nonSkipped = foundFirstNonSkippedElement();
+            boolean nonSkipped = false;
+            if (!isIndexHandler) {
+                nonSkipped = foundFirstNonSkippedElement();
+            }
             flushText();
             ElementNodeBuilder enb = enbStack.remove(enbStack.size() - 1);
             enb.endChildrenChunk();
             endChildInParent(enb, nonSkipped);
             freeENB(enb);
-            if (nonSkipped) {
-                writeElement();
+            if (!isIndexHandler) {
+                if (nonSkipped) {
+                    writeElement();
+                }
+                endElementChildPathStep();
             }
-            endElementChildPathStep();
         } catch (IOException e) {
             e.printStackTrace();
             throw new SAXException(e);
@@ -248,7 +259,7 @@
 
     @Override
     public void startDocument() throws SAXException {
-        if (subElement == null) {
+        if (isIndexHandler || subElement == null) {
             skipping = false;
         }
         db.reset();
@@ -305,7 +316,10 @@
     @Override
     public void startElement(String uri, String localName, String name, Attributes atts) throws SAXException {
         ++depth;
-        boolean start = startElementChildPathStep(uri, localName);
+        boolean start = false;
+        if (!isIndexHandler) {
+            start = startElementChildPathStep(uri, localName);
+        }
 
         if (skipping) {
             return;
@@ -392,7 +406,7 @@
         }
     }
 
-    private void flushText() throws IOException {
+    protected void flushText() throws IOException {
         if (pendingText) {
             peekENBStackTop().startChild(tnb);
             if (createNodeIds) {
@@ -471,7 +485,7 @@
         out.write(resultABVS.getByteArray(), resultABVS.getStartOffset(), resultABVS.getLength());
     }
 
-    private ElementNodeBuilder createENB() {
+    protected ElementNodeBuilder createENB() {
         if (freeENBList.isEmpty()) {
             return new ElementNodeBuilder();
         }
@@ -482,7 +496,7 @@
         freeENBList.add(enb);
     }
 
-    private ElementNodeBuilder peekENBStackTop() {
+    protected ElementNodeBuilder peekENBStackTop() {
         return enbStack.get(enbStack.size() - 1);
     }
 

diff --git a/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/XMLParser.java b/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/XMLParser.java
index a62a26c..34d7ba9 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/XMLParser.java
+++ b/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/XMLParser.java

@@ -57,7 +57,7 @@
         try {
             parser = XMLReaderFactory.createXMLReader();
             if (appender == null) {
-                handler = new SAXContentHandler(attachTypes, idProvider);
+                handler = new SAXContentHandler(attachTypes, idProvider, false);
             } else {
                 List<SequenceType> childSequenceTypes = new ArrayList<SequenceType>();
                 for (int typeCode : childSeq) {

diff --git a/vxquery-xtest/src/test/java/org/apache/vxquery/xtest/VXQueryTest.java b/vxquery-xtest/src/test/java/org/apache/vxquery/xtest/VXQueryTest.java
index 4d0ddc0..11f7eb2 100644
--- a/vxquery-xtest/src/test/java/org/apache/vxquery/xtest/VXQueryTest.java
+++ b/vxquery-xtest/src/test/java/org/apache/vxquery/xtest/VXQueryTest.java

@@ -1,25 +1,26 @@
 /*
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements.  See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package org.apache.vxquery.xtest;
 
 import java.io.File;
 import java.io.IOException;
 import java.util.Collection;
 
+import org.apache.commons.io.FileUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
@@ -30,9 +31,10 @@
 @RunWith(Parameterized.class)
 public class VXQueryTest extends AbstractXQueryTest {
     private static MiniDFS dfs;
+    private final static String TMP = "target/tmp";
 
-    private static String VXQUERY_CATALOG = StringUtils.join(new String[] { "src", "test", "resources",
-            "VXQueryCatalog.xml" }, File.separator);
+    private static String VXQUERY_CATALOG = StringUtils
+            .join(new String[] { "src", "test", "resources", "VXQueryCatalog.xml" }, File.separator);
 
     public VXQueryTest(TestCase tc) throws Exception {
         super(tc);
@@ -57,7 +59,12 @@
     }
 
     @BeforeClass
-    public static void setupHDFS() {
+    public static void setup() throws IOException {
+        File tmp = new File(TMP);
+        if (tmp.exists()) {
+            FileUtils.deleteDirectory(tmp);
+        }
+        new File(TMP.concat("/indexFolder")).mkdirs();
         dfs = new MiniDFS();
         try {
             dfs.startHDFS();
@@ -67,7 +74,11 @@
     }
 
     @AfterClass
-    public static void shutdownHDFS() {
+    public static void shutdown() throws IOException {
+        File tmp = new File(TMP);
+        if (tmp.exists()) {
+            FileUtils.deleteDirectory(tmp);
+        }
         dfs.shutdownHDFS();
     }
 

diff --git a/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/createIndex.txt b/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/createIndex.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/createIndex.txt


diff --git a/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex1.txt b/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex1.txt
new file mode 100644
index 0000000..baf9dca
--- /dev/null
+++ b/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex1.txt

@@ -0,0 +1,2 @@
+<data><date>2003-03-03T00:00:00.000</date><dataType>TMIN</dataType><station>GHCND:AS000000003</station><value>13.75</value><attributes><attribute/><attribute/><attribute>a</attribute><attribute/></attributes></data>
+<data><date>2003-03-03T00:00:00.000</date><dataType>TMAX</dataType><station>GHCND:AS000000003</station><value>33</value><attributes><attribute/><attribute/><attribute>a</attribute></attributes></data>
\ No newline at end of file

diff --git a/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex2.txt b/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex2.txt
new file mode 100644
index 0000000..ef8dde4
--- /dev/null
+++ b/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex2.txt

@@ -0,0 +1 @@
+<data><date>2001-01-01T00:00:00.000</date><dataType>AWND</dataType><station>GHCND:US000000001</station><value>1000</value><attributes><attribute/><attribute/><attribute>a</attribute></attributes></data>
\ No newline at end of file

diff --git a/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex3.txt b/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex3.txt
new file mode 100644
index 0000000..d8263ee
--- /dev/null
+++ b/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex3.txt

@@ -0,0 +1 @@
+2
\ No newline at end of file

diff --git a/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex4.txt b/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex4.txt
new file mode 100644
index 0000000..f30101c
--- /dev/null
+++ b/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex4.txt

@@ -0,0 +1 @@
+3.3
\ No newline at end of file

diff --git a/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex5.txt b/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex5.txt
new file mode 100644
index 0000000..c84c360
--- /dev/null
+++ b/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex5.txt

@@ -0,0 +1,3 @@
+<data><date>2002-02-02T00:00:00.000</date><dataType>TMIN</dataType><station>GHCND:US000000002</station><value>12.5</value><attributes><attribute/><attribute/><attribute>a</attribute><attribute/></attributes></data>
+<data><date>2002-02-02T00:00:00.000</date><dataType>TMAX</dataType><station>GHCND:US000000002</station><value>32</value><attributes><attribute/><attribute/><attribute>a</attribute><attribute/></attributes></data>
+<data><date>2002-02-02T00:00:00.000</date><dataType>PRCP</dataType><station>GHCND:US000000002</station><value>20</value><attributes><attribute/><attribute/><attribute>a</attribute></attributes></data>
\ No newline at end of file

diff --git a/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex6.txt b/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex6.txt
new file mode 100644
index 0000000..9abedff
--- /dev/null
+++ b/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex6.txt

@@ -0,0 +1,2 @@
+<station><id>GHCND:US000000001</id><displayName>Station 1</displayName><latitude>10.000</latitude><longitude>-10.000</longitude><elevation>1000.0</elevation><locationLabels><type>ST</type><id>FIPS:1</id><displayName>State 1</displayName></locationLabels><locationLabels><type>CNTY</type><id>FIPS:-9999</id><displayName>County 1</displayName></locationLabels><locationLabels><type>CNTRY</type><id>FIPS:US</id><displayName/></locationLabels></station>
+<station><id>GHCND:US000000002</id><displayName>Station 2</displayName><latitude>20.000</latitude><longitude>-20.000</longitude><elevation>2000.0</elevation><locationLabels><type>ST</type><id>FIPS:1</id><displayName>State 1</displayName></locationLabels><locationLabels><type>CNTY</type><id>FIPS:-9999</id><displayName>County 2</displayName></locationLabels><locationLabels><type>CNTRY</type><id>FIPS:US</id><displayName/></locationLabels></station>
\ No newline at end of file

diff --git a/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex7.txt b/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex7.txt
new file mode 100644
index 0000000..c84c360
--- /dev/null
+++ b/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/useIndex7.txt

@@ -0,0 +1,3 @@
+<data><date>2002-02-02T00:00:00.000</date><dataType>TMIN</dataType><station>GHCND:US000000002</station><value>12.5</value><attributes><attribute/><attribute/><attribute>a</attribute><attribute/></attributes></data>
+<data><date>2002-02-02T00:00:00.000</date><dataType>TMAX</dataType><station>GHCND:US000000002</station><value>32</value><attributes><attribute/><attribute/><attribute>a</attribute><attribute/></attributes></data>
+<data><date>2002-02-02T00:00:00.000</date><dataType>PRCP</dataType><station>GHCND:US000000002</station><value>20</value><attributes><attribute/><attribute/><attribute>a</attribute></attributes></data>
\ No newline at end of file

diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/createIndex.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/createIndex.xq
new file mode 100644
index 0000000..f34ac4c
--- /dev/null
+++ b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/createIndex.xq

@@ -0,0 +1,20 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+   
+(: Build Lucene Index :)
+build-index-on-collection( "src/test/resources/TestSources/ghcnd", "target/tmp/indexFolder")
+   
\ No newline at end of file

diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex1.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex1.xq
new file mode 100644
index 0000000..1635f61
--- /dev/null
+++ b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex1.xq

@@ -0,0 +1,25 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+   
+(: Search Lucene Index :)
+for $r in collection-from-index("target/tmp/indexFolder", "/dataCollection/data")/data
+let $datetime := xs:dateTime(fn:data($r/date))
+where $r/station eq "GHCND:AS000000003" 
+    and fn:year-from-dateTime($datetime) ge 2000
+    and fn:month-from-dateTime($datetime) eq 3 
+    and fn:day-from-dateTime($datetime) eq 3
+return $r
\ No newline at end of file

diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex2.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex2.xq
new file mode 100644
index 0000000..bf19ee9
--- /dev/null
+++ b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex2.xq

@@ -0,0 +1,24 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: Search Lucene Index :)
+(: Find all reading for hurricane force wind warning or extreme wind warning. :)
+(: The warnings occur when the wind speed (AWND) exceeds 110 mph (49.1744     :)
+(: meters per second). (Wind value is in tenth of a meter per second)         :)
+for $r in collection-from-index("target/tmp/indexFolder", "/dataCollection/data")/data
+where $r/dataType eq "AWND" and xs:decimal($r/value) gt 491.744
+return $r

diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex3.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex3.xq
new file mode 100644
index 0000000..28cf019
--- /dev/null
+++ b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex3.xq

@@ -0,0 +1,27 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: Search Lucene Index :)
+(: Find the annual precipitation (PRCP) for a Seattle using the airport       :)
+(: station (US000000002) for 2002.                                            :)
+fn:sum(
+    for $r in collection-from-index("target/tmp/indexFolder", "/dataCollection/data")/data
+    where $r/station eq "GHCND:US000000002" 
+        and $r/dataType eq "PRCP" 
+        and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2002
+    return $r/value
+) div 10

diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex4.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex4.xq
new file mode 100644
index 0000000..2b75cf4
--- /dev/null
+++ b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex4.xq

@@ -0,0 +1,24 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: Search Lucene Index :)
+(: Find the highest recorded temperature (TMAX) in Celsius.                   :)
+fn:max(
+    for $r in collection-from-index("target/tmp/indexFolder", "/dataCollection/data")/data
+    where $r/dataType eq "TMAX"
+    return $r/value
+) div 10

diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex5.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex5.xq
new file mode 100644
index 0000000..e83484a
--- /dev/null
+++ b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex5.xq

@@ -0,0 +1,23 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: Search Lucene Index :)
+(: Find all the weather readings for Washington state for a specific day    :)
+(: 2002-2-2.                                                                  :)
+for $r in collection-from-index("target/tmp/indexFolder", "/dataCollection/data")/data
+where xs:dateTime(fn:data($r/date)) eq xs:dateTime("2002-02-02T00:00:00.000")
+return $r

diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex6.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex6.xq
new file mode 100644
index 0000000..04f6672
--- /dev/null
+++ b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex6.xq

@@ -0,0 +1,23 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: Search Lucene Index :)
+(: Find all the weather readings for Washington state for a specific day    :)
+(: 2002-2-2.                                                                  :)
+for $s in collection-from-index("target/tmp/indexFolder", "/stationCollection/station")/station
+where (some $x in $s/locationLabels satisfies ($x/type eq "ST" and fn:upper-case(fn:data($x/displayName)) eq "STATE 1"))
+return $s

diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex7.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex7.xq
new file mode 100644
index 0000000..e471baa
--- /dev/null
+++ b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/useIndex7.xq

@@ -0,0 +1,27 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: Search Lucene Index :)
+(: Find all the weather readings for Washington state for a specific day    :)
+(: 2002-2-2.                                                                  :)
+for $s in collection-from-index("target/tmp/indexFolder", "/stationCollection/station")/station
+for $r in collection-from-index("target/tmp/indexFolder", "/dataCollection/data")/data
+    
+where $s/id eq $r/station 
+    and (some $x in $s/locationLabels satisfies ($x/type eq "ST" and fn:upper-case(fn:data($x/displayName)) eq "STATE 1"))
+    and xs:dateTime(fn:data($r/date)) eq xs:dateTime("2002-02-02T00:00:00.000")
+return $r

diff --git a/vxquery-xtest/src/test/resources/VXQueryCatalog.xml b/vxquery-xtest/src/test/resources/VXQueryCatalog.xml
index f75ce49..414601e 100644
--- a/vxquery-xtest/src/test/resources/VXQueryCatalog.xml
+++ b/vxquery-xtest/src/test/resources/VXQueryCatalog.xml

@@ -42,6 +42,8 @@
 
 <!ENTITY HDFSAggregateQueries SYSTEM "cat/HDFSAggregateQueries.xml">
 
+<!ENTITY IndexingQueries SYSTEM "cat/IndexingQueries.xml">
+
 ]>
 <test-suite xmlns="http://www.w3.org/2005/02/query-test-XQTSCatalog"
             xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
@@ -208,4 +210,17 @@
         &HDFSAggregateQueries;
       </test-group>
    </test-group>
+   <test-group name="IndexingQueries" featureOwner="Steven Jacobs">
+      <GroupInfo>
+         <title>Indexing Queries</title>
+         <description/>
+      </GroupInfo>
+      <test-group name="IndexingTests" featureOwner="Steven Jacobs">
+         <GroupInfo>
+            <title>Indexing Execution Tests</title>
+            <description/>
+         </GroupInfo>
+        &IndexingQueries;
+      </test-group>
+   </test-group>
 </test-suite>
\ No newline at end of file

diff --git a/vxquery-xtest/src/test/resources/cat/IndexingQueries.xml b/vxquery-xtest/src/test/resources/cat/IndexingQueries.xml
new file mode 100644
index 0000000..1f8291d
--- /dev/null
+++ b/vxquery-xtest/src/test/resources/cat/IndexingQueries.xml

@@ -0,0 +1,63 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<test-group xmlns="http://www.w3.org/2005/02/query-test-XQTSCatalog" name="IndexingQueries" featureOwner="VXQuery">
+   <GroupInfo>
+      <title>Indexing</title>
+      <description/>
+   </GroupInfo>
+   <test-case name="create-index" FilePath="Indexing/" Creator="Steven Jacobs">
+      <description>Create Lucene Index from Collection.</description>
+      <query name="createIndex" date="2016-05-26"/>
+      <output-file compare="Text">createIndex.txt</output-file>
+   </test-case>
+   <test-case name="use-index-1" FilePath="Indexing/" Creator="Steven Jacobs">
+      <description>Get Collection From Lucene Index</description>
+      <query name="useIndex1" date="2016-05-26"/>
+      <output-file compare="Text">useIndex1.txt</output-file>
+   </test-case>
+   <test-case name="use-index-2" FilePath="Indexing/" Creator="Steven Jacobs">
+      <description>Get Collection From Lucene Index</description>
+      <query name="useIndex2" date="2016-05-26"/>
+      <output-file compare="Text">useIndex2.txt</output-file>
+   </test-case>
+   <test-case name="use-index-3" FilePath="Indexing/" Creator="Steven Jacobs">
+      <description>Get Collection From Lucene Index</description>
+      <query name="useIndex3" date="2016-05-26"/>
+      <output-file compare="Text">useIndex3.txt</output-file>
+   </test-case>
+   <test-case name="use-index-4" FilePath="Indexing/" Creator="Steven Jacobs">
+      <description>Get Collection From Lucene Index</description>
+      <query name="useIndex4" date="2016-05-26"/>
+      <output-file compare="Text">useIndex4.txt</output-file>
+   </test-case>
+   <test-case name="use-index-5" FilePath="Indexing/" Creator="Steven Jacobs">
+      <description>Get Collection From Lucene Index</description>
+      <query name="useIndex5" date="2016-05-26"/>
+      <output-file compare="Text">useIndex5.txt</output-file>
+   </test-case>
+   <test-case name="use-index-6" FilePath="Indexing/" Creator="Steven Jacobs">
+      <description>Get Collection From Lucene Index</description>
+      <query name="useIndex6" date="2016-05-26"/>
+      <output-file compare="Text">useIndex6.txt</output-file>
+   </test-case>
+   <test-case name="use-index-7" FilePath="Indexing/" Creator="Steven Jacobs">
+      <description>Get Collection From Lucene Index</description>
+      <query name="useIndex7" date="2016-05-26"/>
+      <output-file compare="Text">useIndex7.txt</output-file>
+   </test-case>
+</test-group>
commit	1f623b1664fb5a86dae8b6be2497d822f80505d9	[log] [tgz]
author	Steven Glenn Jacobs <sjaco002@ucr.edu>	Fri May 27 12:42:47 2016 -0700
committer	Steven Glenn Jacobs <sjaco002@ucr.edu>	Fri May 27 12:42:47 2016 -0700
tree	e4bc56d292bafa8fd0012359c9bb44aa53b9d31d
parent	2b59326602b3f7d7262f3241971b786dde152241 [diff]