[UIMA-2318] - Define automatic distribution of a closed term set over multiple fields in one field definition, applied Erik Faessler's patch git-svn-id: https://svn.apache.org/repos/asf/uima/addons/trunk@1505647 13f79535-47bb-0310-9956-ffa450edef68

commit: bd3841fa3daaee2555ef807316eb6195eba52e84 [log] [tgz]
author: Tommaso Teofili <tommaso@apache.org> Mon Jul 22 10:25:15 2013 +0000
committer: Tommaso Teofili <tommaso@apache.org> Mon Jul 22 10:25:15 2013 +0000
tree: b0d66779e168adcfc886a3580a3646762250c7e2
parent: 2c58543daad70928bb97586a971607c2133b4545 [diff]
diff --git a/Lucas/desc/lucas.xsd b/Lucas/desc/lucas.xsd
index 72469df..a7b0af9 100644
--- a/Lucas/desc/lucas.xsd
+++ b/Lucas/desc/lucas.xsd

@@ -1,138 +1,156 @@
 <?xml version="1.0"?>
 
-<!--
-   Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License.    
--->
-
-<!--
-   Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License.    
--->
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor 
+	license agreements. See the NOTICE file distributed with this work for additional 
+	information regarding copyright ownership. The ASF licenses this file to 
+	you under the Apache License, Version 2.0 (the "License"); you may not use 
+	this file except in compliance with the License. You may obtain a copy of 
+	the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required 
+	by applicable law or agreed to in writing, software distributed under the 
+	License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 
+	OF ANY KIND, either express or implied. See the License for the specific 
+	language governing permissions and limitations under the License. -->
 
 <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
-		   elementFormDefault="qualified">
+	elementFormDefault="qualified">
 
 	<xs:simpleType name="indexType">
-  		<xs:restriction base="xs:string">
-    		<xs:enumeration value="yes"/>
-    		<xs:enumeration value="no"/>
-    		<xs:enumeration value="no_norms"/>
-    		<xs:enumeration value="no_tf"/>
-    		<xs:enumeration value="no_norms_tf"/>
-  		</xs:restriction>
+		<xs:restriction base="xs:string">
+			<xs:enumeration value="yes" />
+			<xs:enumeration value="no" />
+			<xs:enumeration value="no_norms" />
+			<xs:enumeration value="no_tf" />
+			<xs:enumeration value="no_norms_tf" />
+		</xs:restriction>
 	</xs:simpleType>
 
 	<xs:simpleType name="termVectorType">
-  		<xs:restriction base="xs:string">
-    		<xs:enumeration value="no"/>
-    		<xs:enumeration value="positions"/>
-    		<xs:enumeration value="offsets"/>
-    		<xs:enumeration value="positions_offsets"/>
-  		</xs:restriction>
+		<xs:restriction base="xs:string">
+			<xs:enumeration value="no" />
+			<xs:enumeration value="positions" />
+			<xs:enumeration value="offsets" />
+			<xs:enumeration value="positions_offsets" />
+		</xs:restriction>
 	</xs:simpleType>
 
 	<xs:simpleType name="storedType">
-  		<xs:restriction base="xs:string">
-    		<xs:enumeration value="yes"/>
-    		<xs:enumeration value="no"/>
-    		<xs:enumeration value="compress"/>
-  		</xs:restriction>
+		<xs:restriction base="xs:string">
+			<xs:enumeration value="yes" />
+			<xs:enumeration value="no" />
+			<xs:enumeration value="compress" />
+		</xs:restriction>
 	</xs:simpleType>
 
 	<xs:simpleType name="tokenizerType">
-  		<xs:restriction base="xs:string">
-    		<xs:enumeration value="cas"/>
-    		<xs:enumeration value="whitespace"/>
-    		<xs:enumeration value="standard"/>
-  		</xs:restriction>
+		<xs:restriction base="xs:string">
+			<xs:enumeration value="cas" />
+			<xs:enumeration value="whitespace" />
+			<xs:enumeration value="standard" />
+		</xs:restriction>
 	</xs:simpleType>
 
 	<xs:simpleType name="positionType">
-  		<xs:restriction base="xs:string">
-    		<xs:enumeration value="first"/>
-    		<xs:enumeration value="last"/>
-  		</xs:restriction>
-	</xs:simpleType>	
+		<xs:restriction base="xs:string">
+			<xs:enumeration value="first" />
+			<xs:enumeration value="last" />
+		</xs:restriction>
+	</xs:simpleType>
 	
-	<xs:element name="feature">
+	<xs:simpleType name="generateCoverFieldNameType">
+		<xs:restriction base="xs:string">
+			<xs:enumeration value="append" />
+			<xs:enumeration value="prepend" />
+			<xs:enumeration value="replace" />
+		</xs:restriction>
+	</xs:simpleType>
+
+	<xs:element name="filters">
 		<xs:complexType>
-			<xs:attribute name="name" type="xs:string"/>
-			<xs:attribute name="uppercase" type="xs:boolean"/>
-			<xs:attribute name="lowercase" type="xs:boolean"/>
-			<xs:attribute name="numberFormat" type="xs:string"/>
+			<xs:sequence>
+				<xs:element ref="filter" minOccurs="1" maxOccurs="unbounded" />
+			</xs:sequence>
 		</xs:complexType>
 	</xs:element>
-	
+
+	<xs:element name="filter">
+		<xs:complexType>
+			<xs:attribute name="className" type="xs:string" />
+			<xs:attribute name="factoryClassName" type="xs:string" />
+			<xs:attribute name="reuseFactory" type="xs:boolean" />
+			<xs:attribute name="name" type="xs:string" />
+			<xs:anyAttribute processContents="lax" />
+		</xs:complexType>
+	</xs:element>
+
+	<xs:element name="features">
+		<xs:complexType>
+			<xs:sequence>
+				<xs:element ref="feature" minOccurs="1" maxOccurs="unbounded" />
+			</xs:sequence>
+		</xs:complexType>
+	</xs:element>
+
+	<xs:element name="feature">
+		<xs:complexType>
+			<xs:attribute name="name" type="xs:string" />
+			<xs:attribute name="numberFormat" type="xs:string" />
+		</xs:complexType>
+	</xs:element>
+
+	<xs:element name="annotations">
+		<xs:complexType>
+			<xs:sequence>
+				<xs:element ref="annotation" minOccurs="1" maxOccurs="unbounded" />
+			</xs:sequence>
+		</xs:complexType>
+	</xs:element>
+
 	<xs:element name="annotation">
 		<xs:complexType>
 			<xs:sequence>
-				<xs:element ref="feature" minOccurs="0" maxOccurs="unbounded"/>
-			</xs:sequence>				
-			<xs:attribute name="type" type="xs:string"/>
-			<xs:attribute name="sofa" type="xs:string"/>
-			<xs:attribute name="featurePath" type="xs:string"/>
-			<xs:attribute name="concatString" type="xs:string"/>
-			<xs:attribute name="splittString" type="xs:string"/>
-			<xs:attribute name="prefix" type="xs:string"/>
-			<xs:attribute name="uppercase" type="xs:boolean"/>
-			<xs:attribute name="lowercase" type="xs:boolean"/>
-			<xs:attribute name="stopwordRemove" type="xs:boolean"/>
-			<xs:attribute name="position" type="positionType"/>
-			<xs:attribute name="addHypernyms" type="xs:boolean"/>
-			<xs:attribute name="mappingFile" type="xs:string"/>
-			<xs:attribute name="snowballFilter" type="xs:string"/>
-			<xs:attribute name="unique" type="xs:boolean"/>
-			<xs:attribute name="tokenizer" type="tokenizerType"/>			
+				<xs:element ref="filters" minOccurs="0" maxOccurs="1" />
+				<xs:element ref="features" minOccurs="0" maxOccurs="1" />
+			</xs:sequence>
+			<xs:attribute name="type" type="xs:string" />
+			<xs:attribute name="sofa" type="xs:string" />
+			<xs:attribute name="featurePath" type="xs:string" />
+			<xs:attribute name="tokenizer" type="tokenizerType" />
+			<xs:attribute name="featureValueDelimiterString" type="xs:string" />
+		</xs:complexType>
+	</xs:element>
+
+	<xs:element name="termSetCoverDefinition">
+		<xs:complexType>
+			<xs:attribute name="coverDefinitionFile" type="xs:string" />
+			<xs:attribute name="generateFieldNameMethod" type="generateCoverFieldNameType" />
+			<xs:attribute name="ignoreCaseOfSelectedTerms" type="xs:boolean" />
 		</xs:complexType>
 	</xs:element>
 
 	<xs:element name="field">
 		<xs:complexType>
 			<xs:sequence>
-				<xs:element ref="annotation" maxOccurs="unbounded"/>
-			</xs:sequence>				
-			<xs:attribute name="name" type="xs:string"/>
-			<xs:attribute name="index" type="indexType"/>
-			<xs:attribute name="termVector" type="termVectorType"/>
-			<xs:attribute name="delimiter" type="xs:string"/>
-			<xs:attribute name="stored" type="storedType"/>
-			<xs:attribute name="merge" type="xs:boolean"/>			
+				<xs:element ref="termSetCoverDefinition" minOccurs="0"
+					maxOccurs="1" />
+				<xs:element ref="filters" minOccurs="0" maxOccurs="1" />
+				<xs:element ref="annotations" minOccurs="1" maxOccurs="1" />
+			</xs:sequence>
+			<xs:attribute name="name" type="xs:string" />
+			<xs:attribute name="index" type="indexType" />
+			<xs:attribute name="termVector" type="termVectorType" />
+			<xs:attribute name="delimiter" type="xs:string" />
+			<xs:attribute name="stored" type="storedType" />
+			<xs:attribute name="merge" type="xs:boolean" />
+			<xs:attribute name="unique" type="xs:boolean" />
+			<xs:attribute name="coverFile" type="xs:string" />
+
 		</xs:complexType>
 	</xs:element>
-	
+
 	<xs:element name="fields">
 		<xs:complexType>
 			<xs:sequence>
-				<xs:element ref="field" maxOccurs="unbounded"/>
+				<xs:element ref="field" maxOccurs="unbounded" />
 			</xs:sequence>
 		</xs:complexType>
 	</xs:element>

diff --git a/Lucas/src/docbook/LuceneCASConsumerUserGuide.xml b/Lucas/src/docbook/LuceneCASConsumerUserGuide.xml
index 56f1519..d2a5160 100644
--- a/Lucas/src/docbook/LuceneCASConsumerUserGuide.xml
+++ b/Lucas/src/docbook/LuceneCASConsumerUserGuide.xml

@@ -1,23 +1,15 @@
 <?xml version="1.0" encoding="UTF-8"?>
 
-<!--
-   Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License.    
--->
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor 
+	license agreements. See the NOTICE file distributed with this work for additional 
+	information regarding copyright ownership. The ASF licenses this file to 
+	you under the Apache License, Version 2.0 (the "License"); you may not use 
+	this file except in compliance with the License. You may obtain a copy of 
+	the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required 
+	by applicable law or agreed to in writing, software distributed under the 
+	License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 
+	OF ANY KIND, either express or implied. See the License for the specific 
+	language governing permissions and limitations under the License. -->
 
 <!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.4//EN"
 "http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd" [
@@ -58,7 +50,7 @@
 			filtering is configurable
 			by an XML file, whereas the index writer is
 			configured by a properties
-			file.   
+			file.
 		</para>
 		<para>
 			To use Lucas, at first a mapping file must be created. You have
@@ -77,7 +69,7 @@
 			further
 			processing. Its also possible to deploy your own
 			token
-			filters. 
+			filters.
 		</para>
 		<para>
 			Lucas can run in multiple deployment scenarios where different
@@ -117,25 +109,25 @@
 				knows
 				three different
 				sources of Lucene token values:
-				</para>
+			</para>
 			<itemizedlist>
 				<listitem>
 					<para>
-						The covered text of a annotation object. 
-					 </para>
+						The covered text of a annotation object.
+					</para>
 				</listitem>
 				<listitem>
 					<para>
-						One or more feature values of a annotation object. 
-					 </para>
+						One or more feature values of a annotation object.
+					</para>
 				</listitem>
 				<listitem>
 					<para>
 						One or more feature values of a feature structure directly
 						or
 						indirectly referenced
-						by an annotation object. 
-					 </para>
+						by an annotation object.
+					</para>
 				</listitem>
 			</itemizedlist>
 			<para>
@@ -400,7 +392,7 @@
 				to the merged or concatenated field token stream as whole.
 				The following
 				example demonstrates how token filters are defined in
-				the mapping file. 
+				the mapping file.
 			</para>
 			<programlisting><![CDATA[<fields>
   <field name=“text” index=“yes” merge=“true”>
@@ -444,8 +436,8 @@
 			</para>
 			<section id="sandbox.luceneCasConsumer.mapping.tokenfilters.selfdefined">
 				<title>
-					Deploying your own Token Filters 
-			</title>
+					Deploying your own Token Filters
+				</title>
 				<para>
 					For scenarios where the built in token filters where not
 					sufficient,
@@ -460,8 +452,8 @@
 					parameter. The next example shows how a such a
 					token
 					filter is
-					referenced in the mapping file.  				
-			</para>
+					referenced in the mapping file.
+				</para>
 				<programlisting><![CDATA[<fields>
   <field name=“text” index=“yes”>
 	<annotations>
@@ -531,8 +523,8 @@
 					name and a reuse attribute.
 					The example below demonstrate how we can
 					reuse a factory
-					instance. 
-		</para>
+					instance.
+				</para>
 				<programlisting><![CDATA[<fields>
   <field name=“text” index=“yes”>
 	<annotations>
@@ -551,6 +543,156 @@
 </fields>]]></programlisting>
 			</section>
 		</section>
+		<section id="sandbox.luceneCasConsumer.mapping.termcover">
+			<title>Defining term covers</title>
+			<para>
+				When
+				defining a normal field in the ways described in the above
+				sections, the
+				term set <emphasis>T</emphasis> resulting from the processing defined by the
+				<emphasis>annotation</emphasis>
+				and
+				<emphasis>filter</emphasis>
+				elements will all be added to the respective field. It is possible
+				to automatically distribute these terms onto multiple fields which
+				are dynamically created. Each term may be included in zero or
+				multiple fields.
+				Which term is to be added to which field(s) is defined
+				by a
+				<emphasis>termCoverDefinition</emphasis>
+				file. The idea is that the whole term set <emphasis>T</emphasis> is
+				<emphasis>covered</emphasis>
+				by several subsets <emphasis>S1,S2,...,SN</emphasis> where each subset corresponds to a field for all
+				terms in this subset. The result is not necessarily a partition,
+				that is,
+				one term may be accepted to multiple fields. Furthermore, to
+				keep true to the notion
+				of a
+				<emphasis>cover</emphasis>
+				,
+				terms which don't belong to any subset are not considered to
+				belong
+				to the field definition at all and would be filtered out
+				anyway (as
+				an assumption; this is theoretically motivated and has no practical
+				consequences here).
+			</para>
+			<para>
+				This mechanism is useful whenever the
+				<emphasis>token source</emphasis>
+				of a field emmits tokens (thus, eventually terms) the user would
+				wish assign to different categories and express this categorization
+				by modelling the terms into one field per category. As an example, a
+				shop system could be considered. An annotation type
+				<emphasis>de.julielab.types.ArticleName</emphasis>
+				would be annotated in
+				<emphasis>CAS</emphasis>
+				objects. Among the text snippets annotated this way one would find
+				<emphasis>light bulb</emphasis>
+				,
+				<emphasis>electric shaver</emphasis>
+				and
+				<emphasis>smartphone</emphasis>
+				for example (the three terms are considered to be article names for
+				this example, even if they are chosen generally enough to be
+				categories themselves). The shop system would have - among others
+				- three article categories
+				<emphasis>electronics</emphasis>
+				,
+				<emphasis>sanitaryArticles</emphasis>
+				and
+				<emphasis>computers</emphasis>
+				. The goal will be to assign the article names to the fields
+				corresponding to their category. Since this information is not given
+				implicitely by different annotation objects (the assumption is that there are far
+				too many categories which could even change over time; this would
+				make maintaining the type system rather tedious), an explicit
+				definition must be delivered. This is achieved using a
+				<emphasis>termCoverDefinitionFile</emphasis>
+				. It is required to expose the following format:
+				<informalequation>
+					<mediaobject>
+						<textobject>
+							<phrase>
+								&lt;term&gt;=&lt;S1&gt;|&lt;S2&gt;|...|&lt;SN&gt;
+							</phrase>
+						</textobject>
+					</mediaobject>
+				</informalequation>
+				That is, one term per line, the categories of a term assigned by a
+				=
+				sign and multiple categories separated by the
+				|
+				character. An example
+				file would read as follows:
+				<programlisting><![CDATA[light bulb=electronics
+electric shaver=electronics|sanitaryArticles
+smartphone=electronics|computers]]>
+				</programlisting>
+			</para>
+			<para>
+				To create fields according to a cover set definition as described
+				above, the element
+				<emphasis>termSetCoverDefinition</emphasis>
+				is introduced into the
+				<emphasis>field</emphasis>
+				element. An example would look like this:
+				<programlisting><![CDATA[<fields>
+  <field name="articlecategory_" ...>
+    <termSetCoverDefinition coverDefinitionFile="pathToCoverDefinitionFile"
+                 generateFieldNameMethod="append|prepend|replace"
+                 ignoreCaseOfSelectedTerms="true|false" />
+    <annotations>
+      <annotation type="de.julielab.types.ArticleName" />
+    </annotations>
+  </field>
+</fields>]]></programlisting>
+				Here,
+				<emphasis>pathToCoverDefinitionFile</emphasis>
+				points to a file as described above. The
+				<emphasis>generateFieldNameMethod</emphasis>
+				attributes takes one of
+				<emphasis>append</emphasis>
+				,
+				<emphasis>prepend</emphasis>
+				or
+				<emphasis>replace</emphasis>
+				. It is used to define the method of how to name the dynamically
+				created category fields. The name will be derived from the value of
+				the
+				<emphasis>name</emphasis>
+				attribute of the
+				<emphasis>field</emphasis>
+				element by appending, prepending or replacing it by the respective
+				category name. If, in the above example,
+				<emphasis>append</emphasis>
+				would be used, the eventual field names would be
+				<emphasis>articlecategory_electronics</emphasis>
+				,
+				<emphasis>articlecategory_sanitaryArticles</emphasis>
+				and
+				<emphasis>articleCategory_computers</emphasis>
+				. Each field would only contain terms defined for it in the
+				<emphasis>termCoverDefinitionFile</emphasis>
+				. The attribute
+				<emphasis>ignoreCaseOfSelectedTerms</emphasis>
+				is used to switch on or off case normalization when checking whether
+				a particular term is allowed for a particular field. When switched
+				off, the term
+				<emphasis>smartphone</emphasis>
+				would be allowed for the fields
+				<emphasis>articlecategory_electronics</emphasis>
+				and
+				<emphasis>articlecategory_computers</emphasis>
+				while
+				<emphasis>SMARTPHONE</emphasis>
+				would not. Setting the attribute to
+				<emphasis>true</emphasis>
+				would lead to the acceptance of both variants into both fields. It
+				is not possible to set this parameter to different values for
+				different cover subset fields of the same cover.
+			</para>
+		</section>
 	</chapter>
 	<chapter id="sandbox.luceneCasConsumer.mapping.reference">
 		<title>Mapping File Reference</title>
@@ -567,6 +709,7 @@
 			</para>
 			<programlisting><![CDATA[<fields>
   <field ..>
+    <termSetCoverDefinition ../>
 	<filters>
 		<filter ../>
 		...
@@ -630,7 +773,7 @@
 								<listitem>
 									<para>
 										contains:
-										<code>filters?, annotations</code>
+										<code>termSetCoverDefinition?, filters?, annotations</code>
 									</para>
 								</listitem>
 							</itemizedlist>
@@ -706,7 +849,8 @@
 												The tokens position increment are adopted in the
 												case
 												of
-												overlapping.</entry>
+												overlapping.
+											</entry>
 										</row>
 										<row>
 											<entry>unique</entry>
@@ -718,7 +862,81 @@
 												resulting Lucene documents. This is required e.g. by Apache
 												Solr for primary key fields. You must not define multiple
 												fields with the same name to be unique, this would break the
-												unique property.</entry>
+												unique property.
+											</entry>
+										</row>
+									</tbody>
+								</tgroup>
+							</table>
+						</para>
+					</listitem>
+					<listitem>
+						<para>
+							<emphasis>termSetCoverDefinition element</emphasis>
+							<itemizedlist>
+								<listitem>
+									<para>element to define the automatical distribution of terms
+										to multiple fields
+									</para>
+								</listitem>
+								<listitem>
+									<para>
+										contains:
+										<code>nothing</code>
+									</para>
+								</listitem>
+							</itemizedlist>
+						</para>
+						<para>
+							<table>
+								<title>termSetCoverDefinition element attributes</title>
+								<tgroup cols="5">
+									<thead>
+										<row>
+											<entry>name</entry>
+											<entry>allowed values</entry>
+											<entry>default value</entry>
+											<entry>mandatory</entry>
+											<entry>description</entry>
+										</row>
+									</thead>
+									<tbody>
+										<row>
+											<entry>coverDefinition
+											File</entry>
+											<entry>string</entry>
+											<entry>-</entry>
+											<entry>yes</entry>
+											<entry>Path to a file defining the term to category
+												assignment (which term belongs to which cover subset).
+											</entry>
+										</row>
+										<row>
+											<entry>generateField
+											NameMethod</entry>
+											<entry>append|prepend|replace</entry>
+											<entry>append</entry>
+											<entry>no</entry>
+											<entry>Determines the name of the cover subset fields. To the
+												original field name, the subset (or category) name is
+												appended or prepended to the field name or replaces it
+												completely.
+											</entry>
+										</row>
+										<row>
+											<entry>ignoreCaseOf
+											SelectedTerms</entry>
+											<entry>boolean</entry>
+											<entry>true</entry>
+											<entry>no</entry>
+											<entry>
+												For each subset field, there is a list of allowed term
+												values defined in the
+												<emphasis>coverDefinitionFile</emphasis>
+												. This parameter determines whether the case of term strings
+												is
+												ignored for the membership-check or not.
+											</entry>
 										</row>
 									</tbody>
 								</tgroup>
@@ -731,7 +949,7 @@
 							<itemizedlist>
 								<listitem>
 									<para>
-										container element for filters										
+										container element for filters
 									</para>
 								</listitem>
 								<listitem>
@@ -781,8 +999,8 @@
 											<entry>
 												the name to reference either a predefined filter (see
 												predefined filter reference)
-												or a reused filter 
-							</entry>
+												or a reused filter
+											</entry>
 										</row>
 										<row>
 											<entry>className</entry>
@@ -793,8 +1011,8 @@
 												The canonical class name of a token filter. the token
 												filter class must provide a
 												single argument constructor which
-												takes the token stream as parameter. 
-							</entry>
+												takes the token stream as parameter.
+											</entry>
 										</row>
 										<row>
 											<entry>factoryClassName</entry>
@@ -828,8 +1046,8 @@
 												are referenced by their names, you
 												also
 												need to provide
-												a name.  
-							</entry>
+												a name.
+											</entry>
 										</row>
 										<row>
 											<entry>*</entry>
@@ -856,7 +1074,7 @@
 							<itemizedlist>
 								<listitem>
 									<para>
-										container element for annotations										
+										container element for annotations
 									</para>
 								</listitem>
 								<listitem>
@@ -875,7 +1093,7 @@
 								<listitem>
 									<para>
 										Describes a token stream which is generated from a CAS
-										annotation index.									
+										annotation index.
 									</para>
 								</listitem>
 								<listitem>
@@ -917,7 +1135,7 @@
 											<entry>
 												Determines from which sofa the annotation index is
 												taken
-							</entry>
+											</entry>
 										</row>
 										<row>
 											<entry>featurePath</entry>
@@ -930,7 +1148,7 @@
 												with the annotation object. Features are separated
 												by
 												a ".".
-							</entry>
+											</entry>
 										</row>
 										<row>
 											<entry>tokenizer</entry>
@@ -960,7 +1178,7 @@
 												feature structure are concatenated and delimited
 												by this
 												string.
-							</entry>
+											</entry>
 										</row>
 									</tbody>
 								</tgroup>
@@ -973,7 +1191,7 @@
 							<itemizedlist>
 								<listitem>
 									<para>
-										Container element for features.									
+										Container element for features.
 									</para>
 								</listitem>
 								<listitem>
@@ -1019,7 +1237,7 @@
 											<entry>yes</entry>
 											<entry>
 												The feature name.
-							</entry>
+											</entry>
 										</row>
 										<row>
 											<entry>numberFormat</entry>
@@ -1048,7 +1266,8 @@
 			<para>Lucas comes with a couple of predefined token filters.
 				This
 				section provides a complete
-				reference for these filters.</para>
+				reference for these filters.
+			</para>
 			<section
 				id="sandbox.luceneCasConsumer.mapping.reference.filters.addition">
 				<title>
@@ -1086,7 +1305,7 @@
 									<entry>no</entry>
 									<entry>
 										A post which is added to the end of each token.
-											</entry>
+									</entry>
 								</row>
 							</tbody>
 						</tgroup>
@@ -1100,7 +1319,8 @@
 				</title>
 				<para>Adds hypernyms of a token with the same offset and
 					position
-					increment 0.</para>
+					increment 0.
+				</para>
 				<programlisting><![CDATA[<filter name="hypernyms" filePath="/path/to/myFile.txt"/>]]></programlisting>
 				<para>
 					<table>
@@ -1144,7 +1364,8 @@
 				</title>
 				<para>Allows to select only the first or the last token of a
 					token
-					stream, all other tokens are discarded.</para>
+					stream, all other tokens are discarded.
+				</para>
 				<programlisting><![CDATA[<filter name="position" position="last"/>]]></programlisting>
 				<para>
 					<table>
@@ -1173,7 +1394,7 @@
 										discarded. Otherwise, if position is set to last, only the
 										last
 										token is returned.
-							</entry>
+									</entry>
 								</row>
 							</tbody>
 						</tgroup>
@@ -1214,7 +1435,7 @@
 										following format:
 										<code>
 											TOKEN_TEXT=REPLACEMENT_TEXT
-								</code>
+										</code>
 										.
 									</entry>
 								</row>
@@ -1293,7 +1514,7 @@
 									<entry>yes</entry>
 									<entry>
 										The string on which tokens are split.
-							</entry>
+									</entry>
 								</row>
 							</tbody>
 						</tgroup>
@@ -1369,7 +1590,7 @@
 									<entry>
 										The stopword file path. Each line of the file contains a
 										single stopword.
-							</entry>
+									</entry>
 								</row>
 								<row>
 									<entry>ignoreCase</entry>
@@ -1379,7 +1600,7 @@
 									<entry>
 										Defines if the stop filter ignores the case of stop
 										words.
-							</entry>
+									</entry>
 								</row>
 							</tbody>
 						</tgroup>
@@ -1392,7 +1613,8 @@
 				</title>
 				<para>Filters tokens with the same token text. The resulting
 					token
-					stream contains only tokens with unique texts.</para>
+					stream contains only tokens with unique texts.
+				</para>
 				<programlisting><![CDATA[<filter name="unique"/>]]></programlisting>
 			</section>
 			<section
@@ -1499,27 +1721,36 @@
 	<chapter id="sandbox.luceneCasConsumer.prospectiveSearch">
 		<title>Prospective Search</title>
 		<para>
-			Prospective search is a search method where a set of search queries are given
+			Prospective search is a search method where a set of search
+			queries are given
 			first
-			and then searched against a stream of documents. A search query divides
+			and then searched against a stream of
+			documents. A search query divides
 			the document
-			stream into a sub-stream which only contains these document which match the
+			stream into a sub-stream
+			which only contains these document which match the
 			query.
-			Users usually define a number of search queries and then subscribe to the
+			Users usually
+			define a number of search queries and then subscribe to the
 			resulting
-			sub-streams. An example for prospective search is a news feed which is monitored
+			sub-streams. An example for prospective search is a news feed which
+			is monitored
 			for certain terms,
-			each time a term occurs a mail notification is send.
+			each time a term occurs a mail
+			notification is send.
 		</para>
 		<para>
 			The user must provide a set of search queries via a
 			<code>SearchQueryProvider</code>
 			,
-			these search queries are then search against the processed CAS as defined
+			these search queries are then search against the processed CAS as
+			defined
 			in the mapping file,
-			if a match occurs a feature structure is inserted into the CAS.
+			if a match occurs a feature structure is
+			inserted into the CAS.
 			Optionally highlighting is
-			supported, annotations for the matchtng text areas are created and linked to
+			supported,
+			annotations for the matchtng text areas are created and linked to
 			the
 			feature structure.
 		</para>
@@ -1539,35 +1770,46 @@
 			<para>
 				The Search Query Provider provides the Perspective Search Analysis
 				Engine
-				with a set of search queries which should be monitored. A search
-				query is a combination of a Lucene query and an id. The id is later
+				with a set of search queries which should be monitored. A
+				search
+				query is a combination of a Lucene query and an id. The id is
+				later
 				needed
-				to map a match to a specific search query. A user usually has a set of
-				search queries which should be monitored, since there is no
+				to map a match to a specific search query. A user usually
+				has a set of
+				search queries which should be monitored, since there is
+				no
 				standardized
-				way to access the search queries the user must implement the
+				way to access the search queries the user must
+				implement the
 				<code>SearchQueryProvider</code>
 				interface and configure the thread-safe implementation as shared
 				resource object.
-				An example for such an implementation could be a search query provider
+				An example for such an implementation could be a
+				search query provider
 				which reads
-				search queries form a database or a web service.
+				search queries form a database or a
+				web service.
 			</para>
 		</section>
 		<section id="sandbox.luceneCasConsumer.prospectiveSearch.searchResults">
 			<title>Search Results</title>
 			<para>
-				The search results are written to the CAS, for each match one Search
+				The search results are written to the CAS, for each match one
+				Search
 				Result
-				feature structure is inserted into the CAS. The Search Result feature
+				feature structure is inserted into the CAS. The Search
+				Result feature
 				structure contains
-				the id and optionally links to an array of annotations which mark the
+				the id and optionally links to an
+				array of annotations which mark the
 				matching
 				text in the CAS.
 			</para>
 			<para>
 				The Search Result type must be mapped to a defined type
-				in the analysis engine descriptor with the following configuration
+				in the
+				analysis engine descriptor with the following configuration
 				parameters:
 				<itemizedlist>
 					<listitem>

diff --git a/Lucas/src/main/java/org/apache/uima/lucas/LuceneDocumentAE.java b/Lucas/src/main/java/org/apache/uima/lucas/LuceneDocumentAE.java
index 9715c70..ee6db4d 100644
--- a/Lucas/src/main/java/org/apache/uima/lucas/LuceneDocumentAE.java
+++ b/Lucas/src/main/java/org/apache/uima/lucas/LuceneDocumentAE.java

@@ -19,6 +19,17 @@
 
 package org.apache.uima.lucas;
 
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
+
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
+
 import org.apache.log4j.Logger;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.document.Document;
@@ -29,31 +40,36 @@
 import org.apache.uima.cas.CAS;
 import org.apache.uima.cas.CASException;
 import org.apache.uima.jcas.JCas;
-import org.apache.uima.lucas.indexer.*;
+import org.apache.uima.lucas.indexer.AnnotationTokenStreamBuilder;
+import org.apache.uima.lucas.indexer.AnnotationTokenStreamBuildingException;
+import org.apache.uima.lucas.indexer.DocumentBuilder;
+import org.apache.uima.lucas.indexer.FieldBuilder;
+import org.apache.uima.lucas.indexer.FieldBuildingException;
+import org.apache.uima.lucas.indexer.FilterBuilder;
+import org.apache.uima.lucas.indexer.FilterBuildingException;
+import org.apache.uima.lucas.indexer.Tokenizer;
 import org.apache.uima.lucas.indexer.analysis.DefaultFilterFactoryRegistry;
 import org.apache.uima.lucas.indexer.analysis.TokenFilterFactory;
-import org.apache.uima.lucas.indexer.mapping.*;
+import org.apache.uima.lucas.indexer.mapping.AnnotationDescription;
+import org.apache.uima.lucas.indexer.mapping.AnnotationMapper;
+import org.apache.uima.lucas.indexer.mapping.ElementMapper;
+import org.apache.uima.lucas.indexer.mapping.FeatureMapper;
+import org.apache.uima.lucas.indexer.mapping.FieldDescription;
+import org.apache.uima.lucas.indexer.mapping.FieldMapper;
+import org.apache.uima.lucas.indexer.mapping.FilterDescription;
+import org.apache.uima.lucas.indexer.mapping.FilterMapper;
+import org.apache.uima.lucas.indexer.mapping.MappingFileReader;
+import org.apache.uima.lucas.indexer.mapping.TermCoverMapper;
 import org.apache.uima.resource.ResourceInitializationException;
 import org.xml.sax.SAXException;
 
-import javax.xml.parsers.ParserConfigurationException;
-import javax.xml.parsers.SAXParser;
-import javax.xml.parsers.SAXParserFactory;
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.Map;
-
 /**
  * Abstract base class for AEs which need to process Lucene {@link Document}
- * objects. 
+ * objects.
  */
 public abstract class LuceneDocumentAE extends CasAnnotator_ImplBase {
 
-	private static final Logger log = Logger
-			.getLogger(LuceneDocumentAE.class);
+	private static final Logger log = Logger.getLogger(LuceneDocumentAE.class);
 
 	public final static String PARAM_MAPPINGFILE = "mappingFile";
 
@@ -75,9 +91,10 @@
 	 * initializes the analyzer
 	 */
 	@Override
-	public void initialize(UimaContext aContext) throws ResourceInitializationException {
+	public void initialize(UimaContext aContext)
+			throws ResourceInitializationException {
 		super.initialize(aContext);
-		
+
 		createFieldDescriptions();
 		createFilterBuilderWithPreloadedResources();
 
@@ -89,7 +106,8 @@
 
 	private void createFieldDescriptions()
 			throws ResourceInitializationException {
-		String mappingFilePath = (String) getContext().getConfigParameterValue(PARAM_MAPPINGFILE);
+		String mappingFilePath = (String) getContext().getConfigParameterValue(
+				PARAM_MAPPINGFILE);
 
 		try {
 			MappingFileReader indexMappingFileReader = createMappingFileReader();
@@ -98,22 +116,23 @@
 					.readFieldDescriptionsFromFile(mappingFile);
 		} catch (IOException e) {
 			throw new ResourceInitializationException(e);
-		}
-		catch (SAXException e) {
+		} catch (SAXException e) {
 			throw new ResourceInitializationException(e);
 		}
 	}
 
-	private MappingFileReader createMappingFileReader() throws IOException{
+	private MappingFileReader createMappingFileReader() throws IOException {
 		try {
 			SAXParser parser = SAXParserFactory.newInstance().newSAXParser();
 			Map<String, ElementMapper<?>> elementMappers = new HashMap<String, ElementMapper<?>>();
-			elementMappers.put(MappingFileReader.ANNOTATION, new AnnotationMapper());
+			elementMappers.put(MappingFileReader.ANNOTATION,
+					new AnnotationMapper());
 			elementMappers.put(MappingFileReader.FILTER, new FilterMapper());
+			elementMappers.put(MappingFileReader.TERM_SET_COVER_DEFINITION, new TermCoverMapper());
 			elementMappers.put(MappingFileReader.FIELD, new FieldMapper());
-			elementMappers.put(MappingFileReader.FEATURE, new FeatureMapper());			
+			elementMappers.put(MappingFileReader.FEATURE, new FeatureMapper());
 			return new MappingFileReader(parser, elementMappers);
-			
+
 		} catch (ParserConfigurationException e) {
 			throw new IOException("Can't build SAXParser: " + e.getMessage());
 		} catch (SAXException e) {
@@ -133,7 +152,8 @@
 		filterBuilder = new FilterBuilder(defaultFilterFactoryRegistry);
 	}
 
-	protected void preloadResources(Collection<FieldDescription> fieldDescriptions,
+	protected void preloadResources(
+			Collection<FieldDescription> fieldDescriptions,
 			Map<String, TokenFilterFactory> defaultFilterFactoryRegistry)
 			throws IOException {
 
@@ -161,7 +181,8 @@
 		}
 	}
 
-	public Document createDocument(CAS cas) throws AnalysisEngineProcessException {
+	public Document createDocument(CAS cas)
+			throws AnalysisEngineProcessException {
 
 		try {
 			JCas jCas = cas.getJCas();
@@ -194,9 +215,9 @@
 			}
 			// create document and add to index
 			Document document = documentBuilder.createDocument(fields);
-			
+
 			return document;
-			
+
 		} catch (AnnotationTokenStreamBuildingException e) {
 			log.error("processCas(CAS)", e);
 			throw new AnalysisEngineProcessException(e);

diff --git a/Lucas/src/main/java/org/apache/uima/lucas/indexer/FieldBuilder.java b/Lucas/src/main/java/org/apache/uima/lucas/indexer/FieldBuilder.java
index 3b200f3..77b93d8 100644
--- a/Lucas/src/main/java/org/apache/uima/lucas/indexer/FieldBuilder.java
+++ b/Lucas/src/main/java/org/apache/uima/lucas/indexer/FieldBuilder.java

@@ -19,6 +19,10 @@
 
 package org.apache.uima.lucas.indexer;
 
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.document.Field;
@@ -28,13 +32,11 @@
 import org.apache.uima.lucas.indexer.analysis.TokenStreamConcatenator;
 import org.apache.uima.lucas.indexer.analysis.TokenStreamMerger;
 import org.apache.uima.lucas.indexer.mapping.FieldDescription;
+import org.apache.uima.lucas.indexer.mapping.TermCoverBuilder;
+import org.apache.uima.lucas.indexer.mapping.TermCoverBuilderFactory;
 import org.apache.uima.lucas.indexer.mapping.FilterDescription;
 import org.apache.uima.lucas.indexer.util.TokenStreamStringConcatenator;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-
 public class FieldBuilder {
 
 	public static final String FIELD_INDEX_NO = "no";
@@ -67,8 +69,11 @@
 	private FilterBuilder filterBuilder;
 	protected FieldDescription fieldDescription;
 
+	private TermCoverBuilderFactory termCoverBuilderFactory;
+
 	public FieldBuilder(FilterBuilder filterBuilder) {
 		tokenStreamStringConcatenator = new TokenStreamStringConcatenator();
+		termCoverBuilderFactory = new TermCoverBuilderFactory();
 		this.filterBuilder = filterBuilder;
 	}
 
@@ -91,30 +96,53 @@
 		// The unique flag means we only want ONE field instance with the
 		// name fieldName.
 		Boolean unique = fieldDescription.getUnique();
+		Boolean coverField = fieldDescription.getTermCoverDescription() != null;
 		Field.Store fieldStore = getFieldStore(fieldDescription.getStored());
 		Field.Index fieldIndex = getFieldIndex(fieldDescription.getIndex());
 		Field.TermVector fieldTermVector = getFieldTermVector(fieldDescription
 				.getTermVector());
 		boolean omitTF = fieldDescription.getIndex().equals(FIELD_INDEX_NO_TF)
 				|| fieldDescription.getIndex().equals(FIELD_INDEX_NO_NORMS_TF);
-		boolean store = fieldStore == Field.Store.YES || fieldStore == Field.Store.COMPRESS;
+		boolean store = fieldStore == Field.Store.YES
+				|| fieldStore == Field.Store.COMPRESS;
 
-		// Created stored fields. The parameters unique, fieldIndex and omitTF
-		// are only necessary in case of a stored and indexed unique field. Then,
-		// the field is instanced stored and indexed, thus only one instance
-		// of the field is necessary. This only works with TokenStreams which
-		// contain exactly one token (if a TokenStream emits more tokens,
-		// several fields will be instanced).
-		if (store)
-			fields.addAll(createStoredFields(fieldName, tokenStream,
-					fieldStore, delimiter, unique, fieldIndex, omitTF));
+		if (!coverField) {
+			// Create stored fields. The parameters unique, fieldIndex and
+			// omitTF are only necessary in case of a stored and indexed
+			// unique field. Then, the field is instanced stored and indexed,
+			// thus only one instance of the field is necessary. This only works
+			// with TokenStreams which contain exactly one token (if a
+			// TokenStream emits more tokens, several fields will be instanced).
+			if (store)
+				fields.addAll(createStoredFields(fieldName, tokenStream,
+						fieldStore, delimiter, unique, fieldIndex, omitTF));
 
-		// Create indexed fields. If the field is unique and has been stored,
-		// there already is an instance of the field and we don't create another.
-		if (fieldIndex != Field.Index.NO && (!unique || !store))
-			fields.add(createIndexedField(fieldName, tokenStream, fieldIndex,
-					fieldTermVector, omitTF));
+			// Create indexed fields. If the field is unique and has been
+			// stored, there already is an instance of the field and we don't
+			// create another.
+			if (fieldIndex != Field.Index.NO && (!unique || !store))
+				fields.add(createIndexedField(fieldName, tokenStream,
+						fieldIndex, fieldTermVector, omitTF));
 
+		} else {
+
+			TermCoverBuilder termCoverBuilder = termCoverBuilderFactory
+					.createTermCoverBuilder(tokenStream, fieldDescription.getTermCoverDescription());
+			while (termCoverBuilder.increaseCoverSubset()) {
+				String coverSubsetName = termCoverBuilder.getCoverSubsetName();
+				TokenStream coverSubsetTokenStream = termCoverBuilder
+						.getPartitionTokenStream();
+				if (store)
+					fields.addAll(createStoredFields(coverSubsetName,
+							coverSubsetTokenStream, fieldStore, delimiter,
+							unique, fieldIndex, omitTF));
+
+				if (fieldIndex != Field.Index.NO && (!unique || !store))
+					fields.add(createIndexedField(coverSubsetName,
+							coverSubsetTokenStream, fieldIndex, fieldTermVector,
+							omitTF));
+			}
+		}
 		return fields;
 	}
 
@@ -158,8 +186,9 @@
 
 	}
 
-	protected Field createIndexedField(String fieldName, TokenStream tokenStream,
-			Index fieldIndex, TermVector fieldTermVector, boolean omitTF) {
+	protected Field createIndexedField(String fieldName,
+			TokenStream tokenStream, Index fieldIndex,
+			TermVector fieldTermVector, boolean omitTF) {
 
 		Field field = new Field(fieldName, tokenStream, fieldTermVector);
 		if (fieldIndex == Field.Index.NOT_ANALYZED_NO_NORMS)

diff --git a/Lucas/src/main/java/org/apache/uima/lucas/indexer/analysis/AnnotationTokenStream.java b/Lucas/src/main/java/org/apache/uima/lucas/indexer/analysis/AnnotationTokenStream.java
index c7055f0..d4e3611 100644
--- a/Lucas/src/main/java/org/apache/uima/lucas/indexer/analysis/AnnotationTokenStream.java
+++ b/Lucas/src/main/java/org/apache/uima/lucas/indexer/analysis/AnnotationTokenStream.java

@@ -48,9 +48,10 @@
 
 /**
  * 
- * AnnotationTokenStream represents a TokenStream which extracts tokens from feature values of
- * annotations of a given type from a JCas object. Each token has the start and end offset from the
- * annotation object. This class supports only the following UIMA JCas types of features:
+ * AnnotationTokenStream represents a TokenStream which extracts tokens from
+ * feature values of annotations of a given type from a JCas object. Each token
+ * has the start and end offset from the annotation object. This class supports
+ * only the following UIMA JCas types of features:
  * <ol>
  * <li>String</li>
  * <li>StringArray</li>
@@ -60,445 +61,511 @@
  */
 public class AnnotationTokenStream extends TokenStream {
 
-  private JCas jCas;
+	private JCas jCas;
 
-  private String featurePath;
+	private String featurePath;
 
-  private List<String> featureNames;
+	private List<String> featureNames;
 
-  private String delimiter;
+	private String delimiter;
 
-  private Iterator<Annotation> annotationIterator; // iterates over annotations
+	private Iterator<Annotation> annotationIterator; // iterates over
+														// annotations
 
-  private Iterator<FeatureStructure> featureStructureIterator; // iterates over feature structures
-                                                               // stored in feature arrays of an
-                                                               // annotation
+	private Iterator<FeatureStructure> featureStructureIterator; // iterates
+																	// over
+																	// feature
+																	// structures
+																	// stored in
+																	// feature
+																	// arrays of
+																	// an
+																	// annotation
 
-  private Iterator<String> featureValueIterator; // iterates over the features of a feature
-                                                 // structure
+	private Iterator<String> featureValueIterator; // iterates over the features
+													// of a feature
+													// structure
 
-  private Annotation currentAnnotation;
+	private Annotation currentAnnotation;
 
-  private Type annotationType;
+	private Type annotationType;
 
-  private Map<String, Format> featureFormats; // a optional map of format object for each feature
+	private Map<String, Format> featureFormats; // a optional map of format
+												// object for each feature
 
-  private static Logger logger = Logger.getLogger(AnnotationTokenStream.class);
+	private static Logger logger = Logger
+			.getLogger(AnnotationTokenStream.class);
 
-  private class NotNullPredicate<T> implements Predicate<T> {
+	private class NotNullPredicate<T> implements Predicate<T> {
 
-    public boolean apply(T object) {
-      return object != null;
-    }
-  }
+		public boolean apply(T object) {
+			return object != null;
+		}
+	}
 
-  /**
-   * Creates a TokenStream which extracts all coveredText feature values of annotations of a given
-   * type from a JCas object. Each token has the start and end offset of the annotation and takes
-   * the covered text value as termText.
-   * 
-   * @param jCas
-   *          the jCas
-   * @param sofaName the name of the subject of analysis (sofa)
-   * @param typeName
-   *          the type of the annotation
-   * @throws CASException
-   */
-  public AnnotationTokenStream(JCas jCas, String sofaName, String typeName) throws InvalidTokenSourceException {
-    this(jCas, sofaName, typeName, null, Collections.<String>emptyList(), null, 
-         Collections.<String, Format>emptyMap());
-  }
+	/**
+	 * Creates a TokenStream which extracts all coveredText feature values of
+	 * annotations of a given type from a JCas object. Each token has the start
+	 * and end offset of the annotation and takes the covered text value as
+	 * termText.
+	 * 
+	 * @param jCas
+	 *            the jCas
+	 * @param sofaName
+	 *            the name of the subject of analysis (sofa)
+	 * @param typeName
+	 *            the type of the annotation
+	 * @throws CASException
+	 */
+	public AnnotationTokenStream(JCas jCas, String sofaName, String typeName)
+			throws InvalidTokenSourceException {
+		this(jCas, sofaName, typeName, null, Collections.<String> emptyList(),
+				null, Collections.<String, Format> emptyMap());
+	}
 
-  /**
-   * Creates a TokenStream which extracts all feature values of a given feature name from
-   * annotations with a given type from a given JCas object. Each token has the start and end offset
-   * of the annotation and uses the feature value as term text.
-   * 
-   * @param jCas
-   *          the JCas object
-   * @param sofaName the name of the subject of analysis (sofa)
-   * @param typeName
-   *          the type of the annotation
-   * @param featureName
-   *          the name of the feature from which the token text is build
-   * @param featureFormat
-   *          optional format object to convert feature values to strings
-   * @throws InvalidTokenSourceException
-   */
+	/**
+	 * Creates a TokenStream which extracts all feature values of a given
+	 * feature name from annotations with a given type from a given JCas object.
+	 * Each token has the start and end offset of the annotation and uses the
+	 * feature value as term text.
+	 * 
+	 * @param jCas
+	 *            the JCas object
+	 * @param sofaName
+	 *            the name of the subject of analysis (sofa)
+	 * @param typeName
+	 *            the type of the annotation
+	 * @param featureName
+	 *            the name of the feature from which the token text is build
+	 * @param featureFormat
+	 *            optional format object to convert feature values to strings
+	 * @throws InvalidTokenSourceException
+	 */
 
-  public AnnotationTokenStream(JCas jCas, String sofaName, String typeName, String featureName,
-          Format featureFormat) throws InvalidTokenSourceException {
-    this(jCas, sofaName, typeName, null, Lists.newArrayList(featureName), null, 
-         featureFormat != null ? ImmutableBiMap.of(featureName, featureFormat): Collections.<String, Format>emptyMap());
-  }
+	public AnnotationTokenStream(JCas jCas, String sofaName, String typeName,
+			String featureName, Format featureFormat)
+			throws InvalidTokenSourceException {
+		this(jCas, sofaName, typeName, null, Lists.newArrayList(featureName),
+				null, featureFormat != null ? ImmutableBiMap.of(featureName,
+						featureFormat) : Collections
+						.<String, Format> emptyMap());
+	}
 
-  /**
-   * Creates a TokenStream which extracts all feature values of a given feature name list from
-   * annotations with a given type from a given JCas object. Each token has the start and end offset
-   * of the annotation and uses the concatenation of all the feature values as term text. Optionally
-   * the different feature values of an annotation can be concatenated with a delimiter.
-   * 
-   * @param jCas
-   *          the JCas object
-   * @param sofaName the name of the Subject Of Analysis (sofa)
-   * @param typeName
-   *          the type of the annotation
-   * @param featureNames
-   *          the name of the feature from which the token text is build
-   * @param delimiter
-   *          a delimiter for concatenating the different feature values of an annotation object. If
-   *          null a white space will be used.
-   * @param featureFormats
-   *          optional map of format objects to convert feature values to strings - the key must be
-   *          the feature name
-   * @throws InvalidTokenSourceException
-   */
-  public AnnotationTokenStream(JCas jCas, String sofaName, String typeName,
-          List<String> featureNames, String delimiter, Map<String, Format> featureFormats)
-          throws InvalidTokenSourceException {
-    this(jCas, sofaName, typeName, null, featureNames, delimiter, featureFormats);
-  }
+	/**
+	 * Creates a TokenStream which extracts all feature values of a given
+	 * feature name list from annotations with a given type from a given JCas
+	 * object. Each token has the start and end offset of the annotation and
+	 * uses the concatenation of all the feature values as term text. Optionally
+	 * the different feature values of an annotation can be concatenated with a
+	 * delimiter.
+	 * 
+	 * @param jCas
+	 *            the JCas object
+	 * @param sofaName
+	 *            the name of the Subject Of Analysis (sofa)
+	 * @param typeName
+	 *            the type of the annotation
+	 * @param featureNames
+	 *            the name of the feature from which the token text is build
+	 * @param delimiter
+	 *            a delimiter for concatenating the different feature values of
+	 *            an annotation object. If null a white space will be used.
+	 * @param featureFormats
+	 *            optional map of format objects to convert feature values to
+	 *            strings - the key must be the feature name
+	 * @throws InvalidTokenSourceException
+	 */
+	public AnnotationTokenStream(JCas jCas, String sofaName, String typeName,
+			List<String> featureNames, String delimiter,
+			Map<String, Format> featureFormats)
+			throws InvalidTokenSourceException {
+		this(jCas, sofaName, typeName, null, featureNames, delimiter,
+				featureFormats);
+	}
 
-  /**
-   * Creates a TokenStream which extracts all feature values of a given feature name list from
-   * annotations with a given type from a given JCas object. Each token has the start and end offset
-   * of the annotation and uses the concatenation of all the feature values as term text.
-   * 
-   * @param jCas
-   *          the JCas object
-   * @param sofaName the name of the Subject Of Analysis (sofa)
-   * @param typeName
-   *          the type of the annotation
-   * @param featureNames
-   *          the name of the feature from which the token text is build
-   * @param featureFormats
-   *          optional map of format objects to convert feature values to strings - the key must be
-   *          the feature name
-   * @throws InvalidTokenSourceException
-   */
-  public AnnotationTokenStream(JCas jCas, String sofaName, String typeName,
-          List<String> featureNames, Map<String, Format> featureFormats) throws InvalidTokenSourceException {
-    this(jCas, sofaName, typeName, null, featureNames, null, featureFormats);
-  }
+	/**
+	 * Creates a TokenStream which extracts all feature values of a given
+	 * feature name list from annotations with a given type from a given JCas
+	 * object. Each token has the start and end offset of the annotation and
+	 * uses the concatenation of all the feature values as term text.
+	 * 
+	 * @param jCas
+	 *            the JCas object
+	 * @param sofaName
+	 *            the name of the Subject Of Analysis (sofa)
+	 * @param typeName
+	 *            the type of the annotation
+	 * @param featureNames
+	 *            the name of the feature from which the token text is build
+	 * @param featureFormats
+	 *            optional map of format objects to convert feature values to
+	 *            strings - the key must be the feature name
+	 * @throws InvalidTokenSourceException
+	 */
+	public AnnotationTokenStream(JCas jCas, String sofaName, String typeName,
+			List<String> featureNames, Map<String, Format> featureFormats)
+			throws InvalidTokenSourceException {
+		this(jCas, sofaName, typeName, null, featureNames, null, featureFormats);
+	}
 
-  /**
-   * Creates a TokenStream which extracts all feature values of a given feature name list from
-   * annotations with a given type from a given JCas object. The addressed features are part of
-   * direct or indirect feature structure value of a annotation. For example a annotation of type
-   * person has a feature address which values are address feature structures with features for the
-   * street, postal code and city . To create tokens with postal code and city of a persons address,
-   * the featurePath must be &quot;address&quot; and the featureNames &quot;postalCode&quot; and
-   * &quot;city&quot;. Each token has the start and end offset of the annotation and uses the
-   * concatenation of all the feature values as term text.
-   * 
-   * @param jCas
-   *          the JCas object
-   * @param sofaName the name of the Subject of Analysis (sofa)
-   * @param typeName
-   *          the type of the annotation
-   * @param featurePath
-   *          the path to the feature structures which features should be used for tokens Path
-   *          entries should be separated by &quot;.&quot;. Example:
-   *          &quot;affiliation.address.country&quot;
-   * @param featureNames
-   *          the name of the feature from which the token text is build
-   * @param featureFormats
-   *          optional map of format objects to convert feature values to strings - the key must be
-   *          the feature name
-   * @throws InvalidTokenSourceException
-   */
-  public AnnotationTokenStream(JCas jCas, String sofaName, String typeName, String featurePath,
-          List<String> featureNames, Map<String, Format> featureFormats) throws InvalidTokenSourceException {
-    this(jCas, sofaName, typeName, featurePath, featureNames, null, featureFormats);
-  }
+	/**
+	 * Creates a TokenStream which extracts all feature values of a given
+	 * feature name list from annotations with a given type from a given JCas
+	 * object. The addressed features are part of direct or indirect feature
+	 * structure value of a annotation. For example a annotation of type person
+	 * has a feature address which values are address feature structures with
+	 * features for the street, postal code and city . To create tokens with
+	 * postal code and city of a persons address, the featurePath must be
+	 * &quot;address&quot; and the featureNames &quot;postalCode&quot; and
+	 * &quot;city&quot;. Each token has the start and end offset of the
+	 * annotation and uses the concatenation of all the feature values as term
+	 * text.
+	 * 
+	 * @param jCas
+	 *            the JCas object
+	 * @param sofaName
+	 *            the name of the Subject of Analysis (sofa)
+	 * @param typeName
+	 *            the type of the annotation
+	 * @param featurePath
+	 *            the path to the feature structures which features should be
+	 *            used for tokens Path entries should be separated by
+	 *            &quot;.&quot;. Example:
+	 *            &quot;affiliation.address.country&quot;
+	 * @param featureNames
+	 *            the name of the feature from which the token text is build
+	 * @param featureFormats
+	 *            optional map of format objects to convert feature values to
+	 *            strings - the key must be the feature name
+	 * @throws InvalidTokenSourceException
+	 */
+	public AnnotationTokenStream(JCas jCas, String sofaName, String typeName,
+			String featurePath, List<String> featureNames,
+			Map<String, Format> featureFormats)
+			throws InvalidTokenSourceException {
+		this(jCas, sofaName, typeName, featurePath, featureNames, null,
+				featureFormats);
+	}
 
-  /**
-   * Creates a TokenStream which extracts all feature values of a given feature name list from
-   * annotations with a given type from a given JCas object. The addressed features are part of
-   * direct or indirect feature structure value of a annotation. For example a annotation of type
-   * person has a feature address which values are address feature structures with features for the
-   * street, postal code and city . To create tokens with postal code and city of a persons address,
-   * the featurePath must be &quot;address&quot; and the featureNames &quot;postalCode&quot; and
-   * &quot;city&quot;. Each token has the start and end offset of the annotation and uses the
-   * concatenation of all the feature values as term text. Optionally the different feature values
-   * of an annotation can be concatenated with a delimiter.
-   * 
-   * @param jCas
-   *          the JCas object
-   * @param sofaName the name of the Subject of Analysis (sofa)
-   * @param typeName
-   *          the type of the annotation
-   * @param featurePath
-   *          the path to the feature structures which features should be used for tokens Path
-   *          entries should be separated by &quot;.&quot;. Example:
-   *          &quot;affiliation.address.country&quot;
-   * @param featureNames
-   *          the name of the feature from which the token text is build
-   * @param delimiter
-   *          a delimiter for concatenating the different feature values of an annotation object. If
-   *          null a white space will be used.
-   * @param featureFormats
-   *          optional map of format objects to convert feature values to strings - the key must be
-   *          the feature name
-   * @throws InvalidTokenSourceException
-   */
-  public AnnotationTokenStream(JCas jCas, String sofaName, String typeName, String featurePath,
-          List<String> featureNames, String delimiter, Map<String, Format> featureFormats)
-          throws InvalidTokenSourceException {
-    super();
+	/**
+	 * Creates a TokenStream which extracts all feature values of a given
+	 * feature name list from annotations with a given type from a given JCas
+	 * object. The addressed features are part of direct or indirect feature
+	 * structure value of a annotation. For example a annotation of type person
+	 * has a feature address which values are address feature structures with
+	 * features for the street, postal code and city . To create tokens with
+	 * postal code and city of a persons address, the featurePath must be
+	 * &quot;address&quot; and the featureNames &quot;postalCode&quot; and
+	 * &quot;city&quot;. Each token has the start and end offset of the
+	 * annotation and uses the concatenation of all the feature values as term
+	 * text. Optionally the different feature values of an annotation can be
+	 * concatenated with a delimiter.
+	 * 
+	 * @param jCas
+	 *            the JCas object
+	 * @param sofaName
+	 *            the name of the Subject of Analysis (sofa)
+	 * @param typeName
+	 *            the type of the annotation
+	 * @param featurePath
+	 *            the path to the feature structures which features should be
+	 *            used for tokens Path entries should be separated by
+	 *            &quot;.&quot;. Example:
+	 *            &quot;affiliation.address.country&quot;
+	 * @param featureNames
+	 *            the name of the feature from which the token text is build
+	 * @param delimiter
+	 *            a delimiter for concatenating the different feature values of
+	 *            an annotation object. If null a white space will be used.
+	 * @param featureFormats
+	 *            optional map of format objects to convert feature values to
+	 *            strings - the key must be the feature name
+	 * @throws InvalidTokenSourceException
+	 */
+	public AnnotationTokenStream(JCas jCas, String sofaName, String typeName,
+			String featurePath, List<String> featureNames, String delimiter,
+			Map<String, Format> featureFormats)
+			throws InvalidTokenSourceException {
+		super();
 
-    this.featurePath = featurePath;
-    this.featureNames = featureNames;
-    this.delimiter = delimiter;
-    if (featureFormats == null)
-      this.featureFormats = Collections.emptyMap();
-    else
-      this.featureFormats = featureFormats;
-    
-    getSofaCas(jCas, sofaName);
-    getTypeForName(typeName);
-    validate(annotationType, featureNames, featurePath);
-    
-    initializeIterators();
+		this.featurePath = featurePath;
+		this.featureNames = featureNames;
+		this.delimiter = delimiter;
+		if (featureFormats == null)
+			this.featureFormats = Collections.emptyMap();
+		else
+			this.featureFormats = featureFormats;
 
-  }
+		getSofaCas(jCas, sofaName);
+		getTypeForName(typeName);
+		validate(annotationType, featureNames, featurePath);
 
-  private void getTypeForName(String typeName) throws InvalidTokenSourceException{
-    annotationType = jCas.getTypeSystem().getType(typeName);
-    if( annotationType == null )
-      throw new InvalidTokenSourceException("Type " + typeName + " not found!");
-  }
+		initializeIterators();
 
-  private void getSofaCas(JCas cas, String sofaName) throws InvalidTokenSourceException {
-    try {
-      jCas = cas.getView(sofaName);
-    } catch (CASException e) {
-      throw new InvalidTokenSourceException(e);
-    }
-  }
+	}
 
-  void validate(Type type, Collection<String> featureNames, String featurePath) throws InvalidTokenSourceException{
-    Type typeToValidate = findTypeWithPath(type, featurePath);
-    
-    for( String featureName: featureNames ){
-      Feature feature = typeToValidate.getFeatureByBaseName(featureName);
-      if( feature == null )
-        throw new InvalidTokenSourceException("Type " + typeToValidate.getName() + " has no feature " + featureName + ". featurePath: " + featurePath);
-    }
-  }
-  
-  private Type findTypeWithPath(Type type, String featurePath) throws InvalidTokenSourceException{
-    if( featurePath == null )
-      return type;
-    
-    String[] featurePathElements = featurePath.split("\\.");
-    Type currentType = type;
-    
-    for( String featurePathElement: featurePathElements ){
-      Feature feature = currentType.getFeatureByBaseName(featurePathElement);
-      if (feature == null)
-        throw new InvalidTokenSourceException("Type " + currentType.getName() + " has no feature " + featurePathElement);
-      
-      currentType = feature.getRange();
-      if (currentType.isArray())
-        currentType = currentType.getComponentType();
-    }
-    
-    return currentType;
-  }
+	private void getTypeForName(String typeName)
+			throws InvalidTokenSourceException {
+		annotationType = jCas.getTypeSystem().getType(typeName);
+		if (annotationType == null)
+			throw new InvalidTokenSourceException("Type " + typeName
+					+ " not found!");
+	}
 
-  @Override
-  public Token next(Token token) throws IOException {
-    while (!featureValueIterator.hasNext()) {
-      while (!featureStructureIterator.hasNext()) {
-        if (!annotationIterator.hasNext())
-          return null;
-        currentAnnotation = (Annotation) annotationIterator.next();
-        featureStructureIterator = createFeatureStructureIterator(currentAnnotation, featurePath);
-      }
+	private void getSofaCas(JCas cas, String sofaName)
+			throws InvalidTokenSourceException {
+		try {
+			jCas = cas.getView(sofaName);
+		} catch (CASException e) {
+			throw new InvalidTokenSourceException(e);
+		}
+	}
 
-      featureValueIterator = createFeatureValueIterator(featureStructureIterator.next(),
-              featureNames);
-    }
+	void validate(Type type, Collection<String> featureNames, String featurePath)
+			throws InvalidTokenSourceException {
+		Type typeToValidate = findTypeWithPath(type, featurePath);
 
-    token.setStartOffset(currentAnnotation.getBegin());
-    token.setEndOffset(currentAnnotation.getEnd());
+		for (String featureName : featureNames) {
+			Feature feature = typeToValidate.getFeatureByBaseName(featureName);
+			if (feature == null)
+				throw new InvalidTokenSourceException("Type "
+						+ typeToValidate.getName() + " has no feature "
+						+ featureName + ". featurePath: " + featurePath);
+		}
+	}
 
-    char[] value = featureValueIterator.next().toCharArray();
-    token.setTermBuffer(value, 0, value.length);
-    return token;
-  }
+	private Type findTypeWithPath(Type type, String featurePath)
+			throws InvalidTokenSourceException {
+		if (featurePath == null)
+			return type;
 
-  /*
-   * (non-Javadoc)
-   * 
-   * @see org.apache.lucene.analysis.TokenStream#next()
-   */
-  @Override
-  public Token next() throws IOException {
-    return next(new Token());
-  }
+		String[] featurePathElements = featurePath.split("\\.");
+		Type currentType = type;
 
-  protected void initializeIterators() {
-    annotationIterator = Iterators.filter(jCas.getAnnotationIndex(annotationType).iterator(),
-            new NotNullPredicate<Annotation>());
+		for (String featurePathElement : featurePathElements) {
+			Feature feature = currentType
+					.getFeatureByBaseName(featurePathElement);
+			if (feature == null)
+				throw new InvalidTokenSourceException("Type "
+						+ currentType.getName() + " has no feature "
+						+ featurePathElement);
 
-    if (!annotationIterator.hasNext()) {
-      featureStructureIterator = Iterators.emptyIterator();
-      featureValueIterator = Iterators.emptyIterator();
-      return;
-    }
+			currentType = feature.getRange();
+			if (currentType.isArray())
+				currentType = currentType.getComponentType();
+		}
 
-    currentAnnotation = (Annotation) annotationIterator.next();
-    featureStructureIterator = createFeatureStructureIterator(currentAnnotation, featurePath);
-    if (!featureStructureIterator.hasNext()) {
-      featureValueIterator = Iterators.emptyIterator();
-      return;
-    }
+		return currentType;
+	}
 
-    FeatureStructure featureStructure = featureStructureIterator.next();
-    featureValueIterator = createFeatureValueIterator(featureStructure, featureNames);
-  }
+	@Override
+	public Token next(Token token) throws IOException {
+		while (!featureValueIterator.hasNext()) {
+			while (!featureStructureIterator.hasNext()) {
+				if (!annotationIterator.hasNext())
+					return null;
+				currentAnnotation = (Annotation) annotationIterator.next();
+				featureStructureIterator = createFeatureStructureIterator(
+						currentAnnotation, featurePath);
+			}
 
-  protected Iterator<FeatureStructure> createFeatureStructureIterator(Annotation annotation,
-          String featurePath) {
-    Collection<FeatureStructure> featureStructures = new LinkedList<FeatureStructure>();
-    Collection<FeatureStructure> childs = new LinkedList<FeatureStructure>();
+			featureValueIterator = createFeatureValueIterator(
+					featureStructureIterator.next(), featureNames);
+		}
 
-    if (featurePath == null) {
-      featureStructures.add(annotation);
-      return featureStructures.iterator();
-    }
+		// If we don't do that we will get problems e.g. with the
+		// HypernymFilter: The tokens are re-used by Lucene 2.9.3 and when the
+		// positionIncrement has once been set to 0, it will stay this way until
+		// it is explicitly set to another value.
+		token.reinit(new Token());
 
-    Type currentType = annotation.getType();
-    if (currentType.isArray())
-      currentType = currentType.getComponentType();
+		token.setStartOffset(currentAnnotation.getBegin());
+		token.setEndOffset(currentAnnotation.getEnd());
 
-    String[] pathEntries = featurePath.split("\\.");
-    featureStructures.add(annotation);
+		char[] value = featureValueIterator.next().toCharArray();
+		token.setTermBuffer(value, 0, value.length);
+		return token;
+	}
 
-    for (String pathEntry : pathEntries) {
-      Feature feature = currentType.getFeatureByBaseName(pathEntry);
-      childs.clear();
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see org.apache.lucene.analysis.TokenStream#next()
+	 */
+	@Override
+	public Token next() throws IOException {
+		return next(new Token());
+	}
 
-      if (feature.getRange().isArray()) {
-        for (FeatureStructure featureStructureItem : featureStructures) {
-          FSArray fsArray = (FSArray) featureStructureItem.getFeatureValue(feature);
-          if (fsArray == null)
-            continue;
+	protected void initializeIterators() {
+		annotationIterator = Iterators.filter(
+				jCas.getAnnotationIndex(annotationType).iterator(),
+				new NotNullPredicate<Annotation>());
 
-          for (int i = 0; i < fsArray.size(); i++)
-            childs.add(fsArray.get(i));
-        }
-      } else
-        for (FeatureStructure featureStructureItem : featureStructures)
-          childs.add(featureStructureItem.getFeatureValue(feature));
+		if (!annotationIterator.hasNext()) {
+			featureStructureIterator = Iterators.emptyIterator();
+			featureValueIterator = Iterators.emptyIterator();
+			return;
+		}
 
-      currentType = feature.getRange();
-      if (currentType.isArray())
-        currentType = currentType.getComponentType();
+		currentAnnotation = (Annotation) annotationIterator.next();
+		featureStructureIterator = createFeatureStructureIterator(
+				currentAnnotation, featurePath);
+		if (!featureStructureIterator.hasNext()) {
+			featureValueIterator = Iterators.emptyIterator();
+			return;
+		}
 
-      featureStructures.clear();
-      featureStructures.addAll(childs);
-    }
+		FeatureStructure featureStructure = featureStructureIterator.next();
+		featureValueIterator = createFeatureValueIterator(featureStructure,
+				featureNames);
+	}
 
-    return Iterators.filter(featureStructures.iterator(), new NotNullPredicate<FeatureStructure>());
-  }
+	protected Iterator<FeatureStructure> createFeatureStructureIterator(
+			Annotation annotation, String featurePath) {
+		Collection<FeatureStructure> featureStructures = new LinkedList<FeatureStructure>();
+		Collection<FeatureStructure> childs = new LinkedList<FeatureStructure>();
 
-  protected Iterator<String> createFeatureValueIterator(FeatureStructure srcFeatureStructure,
-          Collection<String> featureNames) {
-    List<String> values = new LinkedList<String>();
-    Type featureType = srcFeatureStructure.getType();
+		if (featurePath == null) {
+			featureStructures.add(annotation);
+			return featureStructures.iterator();
+		}
 
-    if (featureNames.size() == 0)
-      values.add(currentAnnotation.getCoveredText());
+		Type currentType = annotation.getType();
+		if (currentType.isArray())
+			currentType = currentType.getComponentType();
 
-    for (String featureName : featureNames) {
-      Feature feature = featureType.getFeatureByBaseName(featureName);
-      if (feature.getRange().isArray()) {
-        StringArray fsArray = (StringArray) srcFeatureStructure.getFeatureValue(feature);
-        if (featureNames.size() == 1) {
-          for (int i = 0; i < fsArray.size(); i++)
-            values.add(fsArray.get(i).toString());
-        } else {
-          String value = "";
-          for (int i = 0; i < fsArray.size(); i++) {
-            value = value.concat(fsArray.get(i).toString());
-            if (i < fsArray.size() - 1)
-              value = value.concat(delimiter);
-          }
-          values.add(value);
-        }
-      } else
-        values.add(getValueForFeature(srcFeatureStructure, feature, featureFormats.get(feature
-                .getShortName())));
-    }
-    String value = "";
-    if (delimiter != null) {
-      for (int i = 0; i < values.size(); i++) {
-        if (values.get(i) == null)
-          continue;
+		String[] pathEntries = featurePath.split("\\.");
+		featureStructures.add(annotation);
 
-        value = value.concat(values.get(i));
-        if (i < values.size() - 1)
-          value = value.concat(delimiter);
-      }
-      values.clear();
-      values.add(value);
-    }
+		for (String pathEntry : pathEntries) {
+			Feature feature = currentType.getFeatureByBaseName(pathEntry);
+			childs.clear();
 
-    return Iterators.filter(values.iterator(), new NotNullPredicate<String>());
-  }
+			if (feature.getRange().isArray()) {
+				for (FeatureStructure featureStructureItem : featureStructures) {
+					FSArray fsArray = (FSArray) featureStructureItem
+							.getFeatureValue(feature);
+					if (fsArray == null)
+						continue;
 
-  public String getValueForFeature(FeatureStructure featureStructure, Feature feature, Format format) {
-    if (format == null)
-      return featureStructure.getFeatureValueAsString(feature);
-    else {
-      Object value = null;
-      if (feature.getRange().getName().equals(CAS.TYPE_NAME_DOUBLE))
-        value = featureStructure.getDoubleValue(feature);
-      else if (feature.getRange().getName().equals(CAS.TYPE_NAME_FLOAT))
-        value = featureStructure.getFloatValue(feature);
-      else if (feature.getRange().getName().equals(CAS.TYPE_NAME_LONG))
-        value = featureStructure.getLongValue(feature);
-      else if (feature.getRange().getName().equals(CAS.TYPE_NAME_INTEGER))
-        value = featureStructure.getIntValue(feature);
-      else if (feature.getRange().getName().equals(CAS.TYPE_NAME_SHORT))
-        value = featureStructure.getShortValue(feature);
+					for (int i = 0; i < fsArray.size(); i++)
+						childs.add(fsArray.get(i));
+				}
+			} else
+				for (FeatureStructure featureStructureItem : featureStructures)
+					childs.add(featureStructureItem.getFeatureValue(feature));
 
-      return format.format(value);
-    }
-  }
+			currentType = feature.getRange();
+			if (currentType.isArray())
+				currentType = currentType.getComponentType();
 
-  public void reset() {
-    featureStructureIterator = null;
-    currentAnnotation = null;
-    featureFormats = Collections.emptyMap();
-    initializeIterators();
-  }
+			featureStructures.clear();
+			featureStructures.addAll(childs);
+		}
 
-  public Map<String, Format> getFeatureFormats() {
-    return featureFormats;
-  }
+		return Iterators.filter(featureStructures.iterator(),
+				new NotNullPredicate<FeatureStructure>());
+	}
 
-  public JCas getJCas() {
-    return jCas;
-  }
+	protected Iterator<String> createFeatureValueIterator(
+			FeatureStructure srcFeatureStructure,
+			Collection<String> featureNames) {
+		List<String> values = new LinkedList<String>();
+		Type featureType = srcFeatureStructure.getType();
 
-  public String getFeaturePath() {
-    return featurePath;
-  }
+		if (featureNames.size() == 0)
+			values.add(currentAnnotation.getCoveredText());
 
-  public List<String> getFeatureNames() {
-    return featureNames;
-  }
+		for (String featureName : featureNames) {
+			Feature feature = featureType.getFeatureByBaseName(featureName);
+			if (feature.getRange().isArray()) {
+				StringArray fsArray = (StringArray) srcFeatureStructure
+						.getFeatureValue(feature);
+				if (featureNames.size() == 1) {
+					for (int i = 0; i < fsArray.size(); i++)
+						values.add(fsArray.get(i).toString());
+				} else {
+					String value = "";
+					for (int i = 0; i < fsArray.size(); i++) {
+						value = value.concat(fsArray.get(i).toString());
+						if (i < fsArray.size() - 1)
+							value = value.concat(delimiter);
+					}
+					values.add(value);
+				}
+			} else
+				values.add(getValueForFeature(srcFeatureStructure, feature,
+						featureFormats.get(feature.getShortName())));
+		}
+		String value = "";
+		if (delimiter != null) {
+			for (int i = 0; i < values.size(); i++) {
+				if (values.get(i) == null)
+					continue;
 
-  public String getDelimiter() {
-    return delimiter;
-  }
+				value = value.concat(values.get(i));
+				if (i < values.size() - 1)
+					value = value.concat(delimiter);
+			}
+			values.clear();
+			values.add(value);
+		}
 
-  public Type getAnnotationType() {
-    return annotationType;
-  }
+		return Iterators.filter(values.iterator(),
+				new NotNullPredicate<String>());
+	}
+
+	public String getValueForFeature(FeatureStructure featureStructure,
+			Feature feature, Format format) {
+		if (format == null)
+			return featureStructure.getFeatureValueAsString(feature);
+		else {
+			Object value = null;
+			if (feature.getRange().getName().equals(CAS.TYPE_NAME_DOUBLE))
+				value = featureStructure.getDoubleValue(feature);
+			else if (feature.getRange().getName().equals(CAS.TYPE_NAME_FLOAT))
+				value = featureStructure.getFloatValue(feature);
+			else if (feature.getRange().getName().equals(CAS.TYPE_NAME_LONG))
+				value = featureStructure.getLongValue(feature);
+			else if (feature.getRange().getName().equals(CAS.TYPE_NAME_INTEGER))
+				value = featureStructure.getIntValue(feature);
+			else if (feature.getRange().getName().equals(CAS.TYPE_NAME_SHORT))
+				value = featureStructure.getShortValue(feature);
+
+			return format.format(value);
+		}
+	}
+
+	public void reset() {
+		featureStructureIterator = null;
+		currentAnnotation = null;
+		featureFormats = Collections.emptyMap();
+		initializeIterators();
+	}
+
+	public Map<String, Format> getFeatureFormats() {
+		return featureFormats;
+	}
+
+	public JCas getJCas() {
+		return jCas;
+	}
+
+	public String getFeaturePath() {
+		return featurePath;
+	}
+
+	public List<String> getFeatureNames() {
+		return featureNames;
+	}
+
+	public String getDelimiter() {
+		return delimiter;
+	}
+
+	public Type getAnnotationType() {
+		return annotationType;
+	}
 
 }

diff --git a/Lucas/src/main/java/org/apache/uima/lucas/indexer/analysis/DefaultFilterFactoryRegistry.java b/Lucas/src/main/java/org/apache/uima/lucas/indexer/analysis/DefaultFilterFactoryRegistry.java
index 34b2a93..ef8c089 100644
--- a/Lucas/src/main/java/org/apache/uima/lucas/indexer/analysis/DefaultFilterFactoryRegistry.java
+++ b/Lucas/src/main/java/org/apache/uima/lucas/indexer/analysis/DefaultFilterFactoryRegistry.java

@@ -40,6 +40,8 @@
   public static final String LOWERCASE_FILTER_FACTORY_NAME = "lowercase";
   public static final String UEA_FILTER_FACTORY_NAME = "uea-stemmer";
   public static final String CONCAT_FILTER_FACTORY_NAME = "concatenate";
+  public static final String SELECT_FILTER_FACTORY_NAME = "select";
+  public static final String REGEXP_FILTER_FACTORY_NAME = "regexp";
 
   public Map<String, TokenFilterFactory> getDefaultRegistry(){
     Map<String, TokenFilterFactory> registry = new HashMap<String, TokenFilterFactory>();
@@ -54,6 +56,8 @@
     registry.put(UNIQUE_FILTER_FACTORY_NAME, new UniqueFilterFactory());
     registry.put(UPPERCASE_FILTER_FACTORY_NAME, new UpperCaseFilterFactory());
     registry.put(LOWERCASE_FILTER_FACTORY_NAME, new LowerCaseFilterFactory());
+    registry.put(SELECT_FILTER_FACTORY_NAME, new SelectFilterFactory());
+    registry.put(REGEXP_FILTER_FACTORY_NAME, new RegExpFilterFactory());
     return registry;
   }
   

diff --git a/Lucas/src/main/java/org/apache/uima/lucas/indexer/analysis/RegExpFilter.java b/Lucas/src/main/java/org/apache/uima/lucas/indexer/analysis/RegExpFilter.java
new file mode 100644
index 0000000..59e1fd6
--- /dev/null
+++ b/Lucas/src/main/java/org/apache/uima/lucas/indexer/analysis/RegExpFilter.java

@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * For each token, matches a given regular expression <code>regExp</code> and, if found, replaces the matching span with <code>replacement</code>
+ */
+package org.apache.uima.lucas.indexer.analysis;
+
+import java.io.IOException;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+
+/**
+ * A {@link TokenFilter} filtering by regular expression
+ * 
+ */
+public class RegExpFilter extends TokenFilter {
+
+	private final String replacement;
+	private final TermAttribute termAtt;
+	private final TermAttribute inputTermAtt;
+	private final Matcher m;
+
+	/**
+	 * 
+	 * @param input input <code>TokenStream</code>
+	 * @param regExp the regular expression which is matched
+	 * @param replacement the replacement string that is inserted for the span matched by <code>regExp</code>
+	 */
+	protected RegExpFilter(TokenStream input, String regExp, String replacement) {
+		super(input);
+		this.replacement = replacement;
+		m = Pattern.compile(regExp).matcher("dummy");
+		termAtt = (TermAttribute) addAttribute(TermAttribute.class);
+		inputTermAtt = (TermAttribute) input.addAttribute(TermAttribute.class);
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see org.apache.lucene.analysis.TokenStream#incrementToken()
+	 */
+	@Override
+	public boolean incrementToken() throws IOException {
+		if (input.incrementToken()) {
+			String term = inputTermAtt.term();
+			m.reset(term);
+			termAtt.setTermBuffer(m.replaceAll(replacement));
+			return true;
+		}
+		return false;
+	}
+
+	/**
+	 * @return the replacement
+	 */
+	public String getReplacement() {
+		return replacement;
+	}
+
+	/**
+	 * @return the regexp
+	 */
+	public String getRegExp() {
+		return m.pattern().pattern();
+	}
+
+}

diff --git a/Lucas/src/main/java/org/apache/uima/lucas/indexer/analysis/RegExpFilterFactory.java b/Lucas/src/main/java/org/apache/uima/lucas/indexer/analysis/RegExpFilterFactory.java
new file mode 100644
index 0000000..1fc1d7f
--- /dev/null
+++ b/Lucas/src/main/java/org/apache/uima/lucas/indexer/analysis/RegExpFilterFactory.java

@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.lucas.indexer.analysis;
+
+import java.io.IOException;
+import java.util.Properties;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * {@link TokenFilterFactory} for {@link RegExpFilter}
+ *
+ */
+public class RegExpFilterFactory implements TokenFilterFactory {
+
+	public static final String REGEXP = "regexp";
+	public static final String REPLACEMENT = "replacement";
+	
+	/* (non-Javadoc)
+	 * @see org.apache.uima.lucas.indexer.analysis.TokenFilterFactory#preloadResources(java.util.Properties)
+	 */
+	public void preloadResources(Properties properties) throws IOException {
+		// TODO Auto-generated method stub
+
+	}
+
+	/* (non-Javadoc)
+	 * @see org.apache.uima.lucas.indexer.analysis.TokenFilterFactory#createTokenFilter(org.apache.lucene.analysis.TokenStream, java.util.Properties)
+	 */
+	public TokenFilter createTokenFilter(TokenStream tokenStream,
+			Properties properties) throws IOException {
+		String regexp = (String) properties.get(REGEXP);
+		String replacement = (String) properties.get(REPLACEMENT);
+		if (regexp == null || replacement == null)
+			throw new IllegalArgumentException("Attributes " + REGEXP + " and " + REPLACEMENT + " must be provided for RegExpFilter.");
+		return new RegExpFilter(tokenStream, regexp, replacement);
+	}
+
+}
+

diff --git a/Lucas/src/main/java/org/apache/uima/lucas/indexer/analysis/SelectFilter.java b/Lucas/src/main/java/org/apache/uima/lucas/indexer/analysis/SelectFilter.java
new file mode 100644
index 0000000..4bb8f5f
--- /dev/null
+++ b/Lucas/src/main/java/org/apache/uima/lucas/indexer/analysis/SelectFilter.java

@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.lucas.indexer.analysis;
+
+import java.io.IOException;
+import java.util.Set;
+
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+
+/**
+ * The opposite to a stop word filter: This filter takes a set of strings. Only
+ * incoming tokens which are contained in this set are returned by this filter.
+ * 
+ * @author faessler
+ */
+public class SelectFilter extends TokenFilter {
+
+	private TermAttribute termAtt;
+	private PositionIncrementAttribute posIncrAtt;
+	private CharArraySet includeWords;
+	boolean enablePositionIncrements;
+	/**
+	 * The select filter can sometimes - for example in conjunction with the
+	 * hypernyms or synonyms filter - cause the first token of the stream to
+	 * have a position increment of 0. This must not happen, which is why in
+	 * such a case we just set the position increment to 1. This is an indicator
+	 * variable to show whether we are at the first token or later.
+	 */
+	private boolean firstToken;
+
+	/**
+	 * Construct a token stream filtering the given input. If
+	 * <code>includeWords</code> is an instance of {@link CharArraySet} (true if
+	 * <code>makeStopSet()</code> was used to construct the set) it will be
+	 * directly used and <code>ignoreCase</code> will be ignored since
+	 * <code>CharArraySet</code> directly controls case sensitivity.
+	 * <p/>
+	 * If <code>includeWords</code> is not an instance of {@link CharArraySet},
+	 * a new CharArraySet will be constructed and <code>ignoreCase</code> will
+	 * be used to specify the case sensitivity of that set.
+	 * 
+	 * @param enablePositionIncrements
+	 *            true if token positions should record the removed non-include
+	 *            words
+	 * @param input
+	 *            Input TokenStream
+	 * @param includeWords
+	 *            The set of words to include.
+	 * @param ignoreCase
+	 *            -Ignore case when filtering.
+	 */
+	public SelectFilter(boolean enablePositionIncrements, TokenStream input,
+			Set<?> includeWords, boolean ignoreCase) {
+		super(input);
+		if (includeWords instanceof CharArraySet) {
+			this.includeWords = (CharArraySet) includeWords;
+		} else {
+			this.includeWords = new CharArraySet(includeWords.size(),
+					ignoreCase);
+			this.includeWords.addAll(includeWords);
+		}
+		this.enablePositionIncrements = enablePositionIncrements;
+		termAtt = (TermAttribute) addAttribute(TermAttribute.class);
+		posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
+		firstToken = true;
+	}
+
+	/**
+	 * Returns the next input Token whose term() is an included word.
+	 */
+	public final boolean incrementToken() throws IOException {
+		// return the first word found to be included
+		int skippedPositions = 0;
+		while (input.incrementToken()) {
+			if (includeWords.contains(termAtt.termBuffer(), 0,
+					termAtt.termLength())) {
+				// The first token must not have a position increment of 0.
+				if (posIncrAtt.getPositionIncrement() == 0 && firstToken)
+					posIncrAtt.setPositionIncrement(1);
+				if (enablePositionIncrements) {
+					posIncrAtt.setPositionIncrement(posIncrAtt
+							.getPositionIncrement() + skippedPositions);
+				}
+				firstToken = false;
+				return true;
+			}
+			skippedPositions += posIncrAtt.getPositionIncrement();
+		}
+		// reached EOS -- return null
+		return false;
+	}
+
+}

diff --git a/Lucas/src/main/java/org/apache/uima/lucas/indexer/analysis/SelectFilterFactory.java b/Lucas/src/main/java/org/apache/uima/lucas/indexer/analysis/SelectFilterFactory.java
new file mode 100644
index 0000000..ea8bfda
--- /dev/null
+++ b/Lucas/src/main/java/org/apache/uima/lucas/indexer/analysis/SelectFilterFactory.java

@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.lucas.indexer.analysis;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+
+import org.apache.log4j.Logger;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.uima.lucas.indexer.util.PlainFileReaderFactory;
+
+import static org.apache.uima.lucas.indexer.analysis.StopwordFilterFactory.FILE_PATH_PARAMETER;
+import static org.apache.uima.lucas.indexer.analysis.StopwordFilterFactory.IGRNORE_CASE_PARAMETER;
+
+/**
+ * {@link TokenFilterFactory} for {@link SelectFilter}
+ */
+public class SelectFilterFactory implements TokenFilterFactory {
+
+	private static Logger LOGGER = Logger.getLogger(SelectFilterFactory.class);
+
+	public static final String COVER_SUBSET_NAME = "coverSubsetName";
+	
+	private static final String TRUE = "true";
+	private static final String ENABLE_POSITION_INCREMENTS = "enablePositionIncrements";
+
+	private PlainFileReaderFactory plainFileReaderFactory;
+	private Map<String, Set<?>> cachedIncludeWords;
+
+	public SelectFilterFactory() {
+		this(new PlainFileReaderFactory());
+	}
+
+	public SelectFilterFactory(PlainFileReaderFactory plainFileReaderFactory) {
+		cachedIncludeWords = new HashMap<String, Set<?>>();
+		this.plainFileReaderFactory = plainFileReaderFactory;
+	}
+
+	public void preloadResources(Properties properties) throws IOException {
+		String filePath = properties.getProperty(FILE_PATH_PARAMETER);
+		String ignoreCaseStr = properties.getProperty(IGRNORE_CASE_PARAMETER);
+		boolean ignoreCase = ignoreCaseStr != null && ignoreCaseStr
+                .equals(TRUE);
+		Set stopwords = getIncludeWords(filePath, ignoreCase);
+
+	}
+
+	public TokenFilter createTokenFilter(TokenStream tokenStream,
+			Properties properties, List<String> includeWordsList) {
+		String ignoreCaseStr = properties.getProperty(IGRNORE_CASE_PARAMETER);
+		String enablePosIncStr = properties
+				.getProperty(ENABLE_POSITION_INCREMENTS);
+		boolean ignoreCase = ignoreCaseStr != null && ignoreCaseStr
+                .equals(TRUE);
+		boolean enablePosInc = enablePosIncStr != null && enablePosIncStr.equals(TRUE);
+
+		Set includeWords = getIncludeWordsForCoverSubset(properties.getProperty(COVER_SUBSET_NAME), includeWordsList, ignoreCase);
+
+		return new SelectFilter(enablePosInc, tokenStream, includeWords,
+				ignoreCase);
+	}
+
+
+	public TokenFilter createTokenFilter(TokenStream tokenStream,
+			Properties properties) throws IOException {
+		String filePath = properties.getProperty(FILE_PATH_PARAMETER);
+		String ignoreCaseStr = properties.getProperty(IGRNORE_CASE_PARAMETER);
+		String enablePosIncStr = properties
+				.getProperty(ENABLE_POSITION_INCREMENTS);
+		boolean ignoreCase = ignoreCaseStr != null && ignoreCaseStr
+                .equals(TRUE);
+		boolean enablePosInc = enablePosIncStr != null && enablePosIncStr.equals(TRUE);
+
+		Set includeWords = getIncludeWords(filePath, ignoreCase);
+
+		return new SelectFilter(enablePosInc, tokenStream, includeWords,
+				ignoreCase);
+	}
+
+	private Set getIncludeWords(String filePath, boolean ignoreCase)
+			throws IOException {
+		Set includeWords = cachedIncludeWords.get(filePath);
+		if (includeWords == null) {
+			String[] includeWordLines = plainFileReaderFactory
+					.createPlainFileReader(filePath).readLines();
+			includeWords = StopFilter.makeStopSet(includeWordLines, ignoreCase);
+			LOGGER.info("file " + filePath + " loaded with "
+					+ includeWords.size() + " stopwords");
+			cachedIncludeWords.put(filePath, includeWords);
+		}
+
+		return includeWords;
+	}
+	
+	/**
+	 * @param coverSubsetName
+	 * @param ignoreCase
+	 * @param includeWordsList
+	 * @return a <code>Set</code>
+	 */
+	private Set<?> getIncludeWordsForCoverSubset(String coverSubsetName, List<String> includeWordsList, boolean ignoreCase) {
+		Set<?> includeWords = cachedIncludeWords.get(coverSubsetName);
+		if (includeWords == null) {
+			includeWords = StopFilter.makeStopSet(includeWordsList, ignoreCase);
+			cachedIncludeWords.put(coverSubsetName, includeWords);
+		}
+
+		return includeWords;
+	}
+}

diff --git a/Lucas/src/main/java/org/apache/uima/lucas/indexer/mapping/FieldDescription.java b/Lucas/src/main/java/org/apache/uima/lucas/indexer/mapping/FieldDescription.java
index 9ba2d92..a2a23f7 100644
--- a/Lucas/src/main/java/org/apache/uima/lucas/indexer/mapping/FieldDescription.java
+++ b/Lucas/src/main/java/org/apache/uima/lucas/indexer/mapping/FieldDescription.java

@@ -43,6 +43,8 @@
 	
 	private Boolean unique = false;
 
+	private TermCoverDescription termCoverDescription;
+	
 	private Collection<AnnotationDescription> annotationDescriptions;
 
 	private Collection<FilterDescription> filterDescriptions;
@@ -150,4 +152,18 @@
 	public void setUnique(Boolean unique) {
 		this.unique = unique;
 	}
+
+	/**
+	 * @return the termCoverDescription
+	 */
+	public TermCoverDescription getTermCoverDescription() {
+		return termCoverDescription;
+	}
+
+	/**
+	 * @param termCoverDescription the termCoverDescription to set
+	 */
+	public void setTermCoverDescription(TermCoverDescription termCoverDescription) {
+		this.termCoverDescription = termCoverDescription;
+	}
 }

diff --git a/Lucas/src/main/java/org/apache/uima/lucas/indexer/mapping/MappingFileReader.java b/Lucas/src/main/java/org/apache/uima/lucas/indexer/mapping/MappingFileReader.java
index 5939252..fad1bd5 100644
--- a/Lucas/src/main/java/org/apache/uima/lucas/indexer/mapping/MappingFileReader.java
+++ b/Lucas/src/main/java/org/apache/uima/lucas/indexer/mapping/MappingFileReader.java

@@ -37,16 +37,18 @@
  */
 public class MappingFileReader extends DefaultHandler {
 
+	public final static String FIELD = "field"; // the tag name for field
+												// definitions
 
-	public final static String FIELD = "field"; // the tag name for field definitions
+	public final static String TERM_SET_COVER_DEFINITION = "termSetCoverDefinition";
 
 	public final static String FILTER = "filter"; // filter element name
 
 	public final static String ANNOTATION = "annotation"; // the tag name for
-													// annotation definitions
+	// annotation definitions
 
 	public final static String FEATURE = "feature"; // the tag name for feature
-												// definitions
+	// definitions
 
 	private Collection<FieldDescription> fieldDescriptions;
 	private SAXParser parser;
@@ -55,8 +57,9 @@
 	private AnnotationDescription currentAnnotationDescription;
 	private Locator currentLocator;
 	private Map<String, ElementMapper<?>> elementMappers;
-	
-	public MappingFileReader(SAXParser parser, Map<String, ElementMapper<?>> elementMappers) throws IOException {
+
+	public MappingFileReader(SAXParser parser,
+			Map<String, ElementMapper<?>> elementMappers) throws IOException {
 		super();
 		fieldDescriptions = new ArrayList<FieldDescription>();
 		this.parser = parser;
@@ -71,11 +74,11 @@
 	}
 
 	private void parseFile(File mappingFile) throws IOException, SAXException {
-//		try {
-			parser.parse(mappingFile, this);
-//		} catch (SAXException e) {
-//			throw new IOException(e);
-//		}
+		// try {
+		parser.parse(mappingFile, this);
+		// } catch (SAXException e) {
+		// throw new IOException(e);
+		// }
 	}
 
 	@Override
@@ -88,6 +91,8 @@
 			Attributes attributes) throws SAXException {
 		if (qName.equals(FIELD))
 			addFieldDescription(attributes);
+		else if (qName.equals(TERM_SET_COVER_DEFINITION))
+			addTermSetCoverDefinition(attributes);
 		else if (qName.equals(FILTER))
 			addFilterDescription(attributes);
 		else if (qName.equals(ANNOTATION))
@@ -97,45 +102,69 @@
 	}
 
 	private void addFieldDescription(Attributes attributes) {
-		ElementMapper<FieldDescription> elementMapper = (ElementMapper<FieldDescription>) elementMappers.get(FIELD);
+		ElementMapper<FieldDescription> elementMapper = (ElementMapper<FieldDescription>) elementMappers
+				.get(FIELD);
 		currentFieldDescription = elementMapper.mapElement(attributes);
 		currentAnnotationDescription = null;
 		mapLocator(currentFieldDescription);
 		fieldDescriptions.add(currentFieldDescription);
 	}
 
+	private void addTermSetCoverDefinition(Attributes attributes) {
+		ElementMapper<TermCoverDescription> termSetCoverDefinitionMapper = (ElementMapper<TermCoverDescription>) elementMappers
+				.get(TERM_SET_COVER_DEFINITION);
+		TermCoverDescription termCoverDescription = termSetCoverDefinitionMapper
+				.mapElement(attributes);
+		if (null != currentFieldDescription.getTermCoverDescription())
+			throw new IllegalStateException(
+					"Only one term cover definition per field is allowed; field \""
+							+ currentFieldDescription.getName()
+							+ "\" has at least two.");
+		mapLocator(termCoverDescription);
+		termCoverDescription.setFieldName(currentFieldDescription.getName());
+		currentFieldDescription.setTermCoverDescription(termCoverDescription);
+	}
+
 	private void addFilterDescription(Attributes attributes) {
-		ElementMapper<FilterDescription> filterMapper = (ElementMapper<FilterDescription>) elementMappers.get(FILTER);
-		FilterDescription filterDescription = filterMapper.mapElement(attributes);
+		ElementMapper<FilterDescription> filterMapper = (ElementMapper<FilterDescription>) elementMappers
+				.get(FILTER);
+		FilterDescription filterDescription = filterMapper
+				.mapElement(attributes);
 		mapLocator(filterDescription);
-		if (currentAnnotationDescription != null ){
-			Collection<FilterDescription> filterDescriptions = currentAnnotationDescription.getFilterDescriptions();
+		if (currentAnnotationDescription != null) {
+			Collection<FilterDescription> filterDescriptions = currentAnnotationDescription
+					.getFilterDescriptions();
 			filterDescriptions.add(filterDescription);
-		}
-		else{
-			Collection<FilterDescription> filterDescriptions = currentFieldDescription.getFilterDescriptions();
+		} else {
+			Collection<FilterDescription> filterDescriptions = currentFieldDescription
+					.getFilterDescriptions();
 			filterDescriptions.add(filterDescription);
 		}
 	}
 
 	private void addAnnotationDescription(Attributes attributes) {
-		ElementMapper<AnnotationDescription> annotationMapper = (ElementMapper<AnnotationDescription>) elementMappers.get(ANNOTATION);
+		ElementMapper<AnnotationDescription> annotationMapper = (ElementMapper<AnnotationDescription>) elementMappers
+				.get(ANNOTATION);
 		currentAnnotationDescription = annotationMapper.mapElement(attributes);
 		mapLocator(currentAnnotationDescription);
-		Collection<AnnotationDescription> annotationDescriptions = currentFieldDescription.getAnnotationDescriptions();
+		Collection<AnnotationDescription> annotationDescriptions = currentFieldDescription
+				.getAnnotationDescriptions();
 		annotationDescriptions.add(currentAnnotationDescription);
 	}
 
 	private void addFeatureDescription(Attributes attributes) {
-		ElementMapper<FeatureDescription> featureMapper = (ElementMapper<FeatureDescription>) elementMappers.get(FEATURE);
-		FeatureDescription featureDescription = featureMapper.mapElement(attributes);
+		ElementMapper<FeatureDescription> featureMapper = (ElementMapper<FeatureDescription>) elementMappers
+				.get(FEATURE);
+		FeatureDescription featureDescription = featureMapper
+				.mapElement(attributes);
 		mapLocator(featureDescription);
-		Collection<FeatureDescription> featureDescriptions = currentAnnotationDescription.getFeatureDescriptions();
+		Collection<FeatureDescription> featureDescriptions = currentAnnotationDescription
+				.getFeatureDescriptions();
 		featureDescriptions.add(featureDescription);
 	}
 
 	private void mapLocator(Locateable locateable) {
-		if (currentLocator!=null){
+		if (currentLocator != null) {
 			int lineNumber = currentLocator.getLineNumber();
 			locateable.setLineNumber(lineNumber);
 			int columnNumber = currentLocator.getColumnNumber();

diff --git a/Lucas/src/main/java/org/apache/uima/lucas/indexer/mapping/TermCoverBuilder.java b/Lucas/src/main/java/org/apache/uima/lucas/indexer/mapping/TermCoverBuilder.java
new file mode 100644
index 0000000..2a03824
--- /dev/null
+++ b/Lucas/src/main/java/org/apache/uima/lucas/indexer/mapping/TermCoverBuilder.java

@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.uima.lucas.indexer.mapping;
+
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.uima.lucas.indexer.analysis.SelectFilter;
+import org.apache.uima.lucas.indexer.analysis.SelectFilterFactory;
+
+/**
+ * <p>
+ * Constructs <tt>TokenStream</tt>s according to a cover definition.
+ * </p>
+ * <p>
+ * This class uses {@link SelectFilter}s to build <tt>TokenStream</tt>s which
+ * only output terms that pass the <tt>SelectFilter</tt>. Each
+ * <tt>TokenStream</tt> corresponds to one cover set and will only output terms
+ * included in this particular set. This mechanism can be used to dynamically
+ * create multiple fields according to a definition about which term may be included
+ * in which field.<br/>
+ * To create an instance of this class, the {@link TermCoverBuilderFactory}
+ * should be used as it cashes the cover definition. For further explanation of
+ * the nature of these covers and their required file format, please refer to
+ * the factory class.
+ * </p>
+ * 
+ * @see TermCoverBuilderFactory
+ *
+ */
+public class TermCoverBuilder {
+
+	private final Map<String, List<String>> vocabularyCover;
+	private Iterator<String> subsetIterator;
+	private String currentCoverSubsetName;
+	private TokenStream currentCoverSubsetTokenStream;
+	private SelectFilterFactory selectFilterFactory;
+	private TokenStream tokenStream;
+	private Properties properties;
+	private final TermCoverDescription fieldDescription;
+
+	/**
+	 * @param vocabularyCover
+	 * @param selectFilterFactory
+	 */
+	public TermCoverBuilder(Map<String, List<String>> vocabularyCover,
+			TokenStream tokenStream, TermCoverDescription fieldDescription,
+			SelectFilterFactory selectFilterFactory) {
+		this.vocabularyCover = vocabularyCover;
+		this.tokenStream = tokenStream;
+		this.fieldDescription = fieldDescription;
+		this.selectFilterFactory = selectFilterFactory;
+		properties = new Properties();
+		properties.put("enablePositionIncrements", "false");
+		properties.put("ignoreCase",
+				fieldDescription.getIgnoreCaseOfSelectedTerms() ? "true" : "false");
+		subsetIterator = vocabularyCover.keySet().iterator();
+	}
+
+	public boolean increaseCoverSubset() {
+		if (subsetIterator.hasNext()) {
+			currentCoverSubsetName = subsetIterator.next();
+			properties.put(SelectFilterFactory.COVER_SUBSET_NAME, getCoverSubsetName());
+			currentCoverSubsetTokenStream = selectFilterFactory
+					.createTokenFilter(tokenStream, properties,
+							vocabularyCover.get(currentCoverSubsetName));
+			return true;
+		}
+		return false;
+	}
+
+	public String getCoverSubsetName() {
+		String fieldName = fieldDescription.getFieldName();
+		String combineFieldName = fieldDescription.getGenerateFieldNameMethod();
+		if (combineFieldName.equals(TermCoverMapper.USE_SUBSET_NAME_APPEND))
+			fieldName = fieldName + currentCoverSubsetName;
+		else if (combineFieldName.equals(TermCoverMapper.USE_SUBSET_NAME_PREPEND))
+			fieldName = currentCoverSubsetName + fieldName;
+		else if (combineFieldName.equals(TermCoverMapper.USE_SUBSET_NAME_REPLACE))
+			fieldName = currentCoverSubsetName;
+		return fieldName;
+	}
+
+	public TokenStream getPartitionTokenStream() {
+		return currentCoverSubsetTokenStream;
+	}
+
+}

diff --git a/Lucas/src/main/java/org/apache/uima/lucas/indexer/mapping/TermCoverBuilderFactory.java b/Lucas/src/main/java/org/apache/uima/lucas/indexer/mapping/TermCoverBuilderFactory.java
new file mode 100644
index 0000000..2080ef5
--- /dev/null
+++ b/Lucas/src/main/java/org/apache/uima/lucas/indexer/mapping/TermCoverBuilderFactory.java

@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * 
+ */
+package org.apache.uima.lucas.indexer.mapping;
+
+import java.io.BufferedReader;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.uima.lucas.indexer.analysis.SelectFilterFactory;
+
+/**
+ * <p>
+ * This factory essentially serves as a caching solution for the files which
+ * define term set covers. Each cover definition file is read once and its
+ * contents are stored in a map. The name of each set of the cover is mapped to
+ * the list of terms included in this set.<br/>
+ * The file format is<br/>
+ * &lt;term&gt;=&lt;set1&gt;|&lt;set2&gt;|...|&lt;setN&gt;<br/>
+ * For example, the file with the following contents:
+ * <p>
+ * <samp>
+ * light bulb=electronics<br/>
+ * electric shaver=electronics|sanitaryArticles<br/>
+ * smartphone=electronics|computers<br/>
+ * </samp>
+ * </p>
+ * will result in three cover set names <tt>electronics</tt>,
+ * <tt>sanitaryArticles</tt> and <tt>computers</tt>. These will be mapped to term
+ * lists as such that <tt>electronics</tt> contains <tt>light bulb</tt>,
+ * <tt>electronic shaver</tt> and <tt>smartphone</tt>. <tt>sanitaryArticles</tt>
+ * will only contain <tt>electric shaver</tt> and <tt>computers</tt> will
+ * include <tt>smartphone</tt>. </p>
+ * <p>
+ * Each of such files is modeled as one instance of {@link TermCoverBuilder}
+ * which then distributes terms (incoming to the field defined with a cover set)
+ * onto the cover fields according to the cover definition.
+ * 
+ * 
+ *
+ */
+public class TermCoverBuilderFactory {
+
+	private final Map<String, Map<String, List<String>>> cachedVocabularyPartitions;
+	private final SelectFilterFactory selectFilterFactory;
+
+	public TermCoverBuilderFactory() {
+		cachedVocabularyPartitions = new HashMap<String, Map<String, List<String>>>();
+		selectFilterFactory = new SelectFilterFactory();
+	}
+
+	public TermCoverBuilder createTermCoverBuilder(TokenStream tokenStream,
+			TermCoverDescription termCoverDescription) {
+		String fileName = termCoverDescription.getCoverDefinitionFile();
+		Map<String, List<String>> vocabularyCover = cachedVocabularyPartitions
+				.get(fileName);
+		if (vocabularyCover == null) {
+			vocabularyCover = new HashMap<String, List<String>>();
+
+			BufferedReader br = null;
+			try {
+				FileReader fr = new FileReader(fileName);
+				br = new BufferedReader(fr);
+				String line;
+				while ((line = br.readLine()) != null) {
+					String[] keyValue = line.split("=");
+					String term = keyValue[0];
+					String[] subsetNames = keyValue[1].split("\\|");
+					for (String subsetName : subsetNames) {
+						List<String> vocabularyList = vocabularyCover
+								.get(subsetName);
+						if (vocabularyList == null) {
+							vocabularyList = new ArrayList<String>();
+							vocabularyCover.put(subsetName, vocabularyList);
+						}
+						vocabularyList.add(term);
+					}
+				}
+			} catch (FileNotFoundException e) {
+				e.printStackTrace();
+			} catch (IOException e) {
+				e.printStackTrace();
+			} finally {
+				try {
+					if (br != null)
+						br.close();
+				} catch (IOException e) {
+					e.printStackTrace();
+				}
+			}
+			cachedVocabularyPartitions.put(fileName, vocabularyCover);
+		}
+
+		return new TermCoverBuilder(vocabularyCover, tokenStream,
+				termCoverDescription, selectFilterFactory);
+	}
+
+}

diff --git a/Lucas/src/main/java/org/apache/uima/lucas/indexer/mapping/TermCoverDescription.java b/Lucas/src/main/java/org/apache/uima/lucas/indexer/mapping/TermCoverDescription.java
new file mode 100644
index 0000000..8648d7e
--- /dev/null
+++ b/Lucas/src/main/java/org/apache/uima/lucas/indexer/mapping/TermCoverDescription.java

@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * 
+ */
+package org.apache.uima.lucas.indexer.mapping;
+
+/**
+ * A description of term covering
+ */
+public class TermCoverDescription implements Locateable {
+
+	private String coverDefinitionFile;
+
+	private String generateFieldNameMethod;
+
+	private boolean ignoreCaseOfSelectedTerms;
+
+	private String fieldName;
+
+	private int line;
+
+	private int column;
+
+	/**
+	 * 
+	 */
+	public TermCoverDescription() {
+		generateFieldNameMethod = TermCoverMapper.USE_SUBSET_NAME_APPEND;
+		// Lets set the default value to true; this way, one can at least see
+		// that there might be too many terms in a field. It's always hard so
+		// see what's NOT there.
+		ignoreCaseOfSelectedTerms = true;
+	}
+
+	/**
+	 * @return the coverDefinitonFile
+	 */
+	public String getCoverDefinitionFile() {
+		return coverDefinitionFile;
+	}
+
+	/**
+	 * @param coverDefinitionFile
+	 *            the coverDefinitionFile to set
+	 */
+	public void setCoverDefinitionFile(String coverDefinitionFile) {
+		this.coverDefinitionFile = coverDefinitionFile;
+	}
+
+	/**
+	 * @see #getGenerateFieldNameMethod()
+	 * @param generateFieldNameMethod
+	 */
+	public void setGenerateFieldNameMethod(String generateFieldNameMethod) {
+		this.generateFieldNameMethod = generateFieldNameMethod;
+
+	}
+
+	/**
+	 * Returns a string identifying the method of dynamic field name generation.
+	 * For valid values, please see below.
+	 * 
+	 * @see TermCoverMapper#USE_SUBSET_NAME_APPEND
+	 * @see TermCoverMapper#USE_SUBSET_NAME_PREPEND
+	 * @see TermCoverMapper#USE_SUBSET_NAME_REPLACE
+	 * @return the method for combination of the cover subset and the field name
+	 *         defined in the mapping file (prepending, appending, replacing)
+	 */
+	public String getGenerateFieldNameMethod() {
+		return generateFieldNameMethod;
+	}
+
+	/**
+	 * @param b
+	 */
+	public void setIgnoreCaseOfSelectedTerms(boolean ignoreCase) {
+		this.ignoreCaseOfSelectedTerms = ignoreCase;
+
+	}
+
+	/**
+	 * @return the ignoreCase
+	 */
+	public boolean getIgnoreCaseOfSelectedTerms() {
+		return ignoreCaseOfSelectedTerms;
+	}
+
+	/**
+	 * @return the fieldName
+	 */
+	public String getFieldName() {
+		return fieldName;
+	}
+
+	/**
+	 * @param fieldName
+	 *            the fieldName to set
+	 */
+	public void setFieldName(String fieldName) {
+		this.fieldName = fieldName;
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see org.apache.uima.lucas.indexer.mapping.Locateable#setLineNumber(int)
+	 */
+	public void setLineNumber(int lineNumber) {
+		line = lineNumber;
+
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see org.apache.uima.lucas.indexer.mapping.Locateable#getLineNumber()
+	 */
+	public int getLineNumber() {
+		return line;
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see
+	 * org.apache.uima.lucas.indexer.mapping.Locateable#setColumnNumber(int)
+	 */
+	public void setColumnNumber(int columnNumber) {
+		column = columnNumber;
+
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see org.apache.uima.lucas.indexer.mapping.Locateable#getColumnNumber()
+	 */
+	public int getColumnNumber() {
+		return column;
+	}
+}

diff --git a/Lucas/src/main/java/org/apache/uima/lucas/indexer/mapping/TermCoverMapper.java b/Lucas/src/main/java/org/apache/uima/lucas/indexer/mapping/TermCoverMapper.java
new file mode 100644
index 0000000..195897c
--- /dev/null
+++ b/Lucas/src/main/java/org/apache/uima/lucas/indexer/mapping/TermCoverMapper.java

@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * 
+ */
+package org.apache.uima.lucas.indexer.mapping;
+
+import org.xml.sax.Attributes;
+
+/**
+ * An {@ink ElementMapper} for {@link TermCoverDescription}s
+ *
+ */
+public class TermCoverMapper implements ElementMapper<TermCoverDescription> {
+
+	private static final String COVER_DEFINITION_FILE = "coverDefinitionFile";
+
+	private static final String GENERATE_FIELDNAME_METHOD = "generateFieldNameMethod";
+
+	private static final String IGNORE_CASE_OF_SELECTED_TERMS = "ignoreCaseOfSelectedTerms";
+
+	/**
+	 * Indicates that the cover subset name is appended to the field name
+	 * specified in mapping file to generate the field name for Lucene
+	 * documents.
+	 */
+	public static final String USE_SUBSET_NAME_APPEND = "append";
+
+	/**
+	 * Indicates that the cover subset name is prepended to the field name
+	 * specified in the mapping file to generate the field name Lucene
+	 * documents.
+	 */
+	public static final String USE_SUBSET_NAME_PREPEND = "prepend";
+
+	/**
+	 * Indicates that the field name in the mapping file is replaced by the
+	 * cover subset name to generate the field name for Lucene documents.
+	 */
+	public static final String USE_SUBSET_NAME_REPLACE = "replace";
+
+	private static final String TRUE = "true";
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see
+	 * org.apache.uima.lucas.indexer.mapping.ElementMapper#mapElement(org.xml
+	 * .sax.Attributes)
+	 */
+	public TermCoverDescription mapElement(Attributes attributes) {
+		TermCoverDescription termCoverDescription = new TermCoverDescription();
+		for (int i = 0; i < attributes.getLength(); i++) {
+			String name = attributes.getQName(i);
+			String value = attributes.getValue(i);
+
+			if (name.equals(COVER_DEFINITION_FILE)) {
+				termCoverDescription.setCoverDefinitionFile(value);
+			} else if (name.equals(GENERATE_FIELDNAME_METHOD)) {
+				if (!value.equals(USE_SUBSET_NAME_APPEND)
+						&& !value.equals(USE_SUBSET_NAME_PREPEND)
+						&& !value.equals(USE_SUBSET_NAME_REPLACE))
+					throw new IllegalArgumentException(
+							"The method to combine the original field name" +
+							" with a cover subset name to generate the final Lucene field name must be one of \""
+									+ USE_SUBSET_NAME_APPEND
+									+ "\", \""
+									+ USE_SUBSET_NAME_PREPEND
+									+ "\" or \""
+									+ USE_SUBSET_NAME_REPLACE
+									+ "\" but was: \"" + value);
+				termCoverDescription.setGenerateFieldNameMethod(value);
+			} else if (name.equals(IGNORE_CASE_OF_SELECTED_TERMS)) {
+				if (value != null && value.equals(TRUE))
+					termCoverDescription.setIgnoreCaseOfSelectedTerms(true);
+			}
+		}
+		return termCoverDescription;
+	}
+}

diff --git a/Lucas/src/main/resources/lucas.xsd b/Lucas/src/main/resources/lucas.xsd
index 1d8c41f..a7b0af9 100644
--- a/Lucas/src/main/resources/lucas.xsd
+++ b/Lucas/src/main/resources/lucas.xsd

@@ -1,143 +1,156 @@
 <?xml version="1.0"?>
 
-<!--
-   Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License.    
--->
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor 
+	license agreements. See the NOTICE file distributed with this work for additional 
+	information regarding copyright ownership. The ASF licenses this file to 
+	you under the Apache License, Version 2.0 (the "License"); you may not use 
+	this file except in compliance with the License. You may obtain a copy of 
+	the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required 
+	by applicable law or agreed to in writing, software distributed under the 
+	License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 
+	OF ANY KIND, either express or implied. See the License for the specific 
+	language governing permissions and limitations under the License. -->
 
 <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
-		   elementFormDefault="qualified">
+	elementFormDefault="qualified">
 
 	<xs:simpleType name="indexType">
-  		<xs:restriction base="xs:string">
-    		<xs:enumeration value="yes"/>
-    		<xs:enumeration value="no"/>
-    		<xs:enumeration value="no_norms"/>
-    		<xs:enumeration value="no_tf"/>
-    		<xs:enumeration value="no_norms_tf"/>
-  		</xs:restriction>
+		<xs:restriction base="xs:string">
+			<xs:enumeration value="yes" />
+			<xs:enumeration value="no" />
+			<xs:enumeration value="no_norms" />
+			<xs:enumeration value="no_tf" />
+			<xs:enumeration value="no_norms_tf" />
+		</xs:restriction>
 	</xs:simpleType>
 
 	<xs:simpleType name="termVectorType">
-  		<xs:restriction base="xs:string">
-    		<xs:enumeration value="no"/>
-    		<xs:enumeration value="positions"/>
-    		<xs:enumeration value="offsets"/>
-    		<xs:enumeration value="positions_offsets"/>
-  		</xs:restriction>
+		<xs:restriction base="xs:string">
+			<xs:enumeration value="no" />
+			<xs:enumeration value="positions" />
+			<xs:enumeration value="offsets" />
+			<xs:enumeration value="positions_offsets" />
+		</xs:restriction>
 	</xs:simpleType>
 
 	<xs:simpleType name="storedType">
-  		<xs:restriction base="xs:string">
-    		<xs:enumeration value="yes"/>
-    		<xs:enumeration value="no"/>
-    		<xs:enumeration value="compress"/>
-  		</xs:restriction>
+		<xs:restriction base="xs:string">
+			<xs:enumeration value="yes" />
+			<xs:enumeration value="no" />
+			<xs:enumeration value="compress" />
+		</xs:restriction>
 	</xs:simpleType>
 
 	<xs:simpleType name="tokenizerType">
-  		<xs:restriction base="xs:string">
-    		<xs:enumeration value="cas"/>
-    		<xs:enumeration value="whitespace"/>
-    		<xs:enumeration value="standard"/>
-  		</xs:restriction>
+		<xs:restriction base="xs:string">
+			<xs:enumeration value="cas" />
+			<xs:enumeration value="whitespace" />
+			<xs:enumeration value="standard" />
+		</xs:restriction>
 	</xs:simpleType>
 
 	<xs:simpleType name="positionType">
-  		<xs:restriction base="xs:string">
-    		<xs:enumeration value="first"/>
-    		<xs:enumeration value="last"/>
-  		</xs:restriction>
+		<xs:restriction base="xs:string">
+			<xs:enumeration value="first" />
+			<xs:enumeration value="last" />
+		</xs:restriction>
+	</xs:simpleType>
+	
+	<xs:simpleType name="generateCoverFieldNameType">
+		<xs:restriction base="xs:string">
+			<xs:enumeration value="append" />
+			<xs:enumeration value="prepend" />
+			<xs:enumeration value="replace" />
+		</xs:restriction>
 	</xs:simpleType>
 
 	<xs:element name="filters">
 		<xs:complexType>
 			<xs:sequence>
-				<xs:element ref="filter" minOccurs="1" maxOccurs="unbounded"/>
+				<xs:element ref="filter" minOccurs="1" maxOccurs="unbounded" />
 			</xs:sequence>
 		</xs:complexType>
 	</xs:element>
-	
+
 	<xs:element name="filter">
 		<xs:complexType>
-			<xs:attribute name="className" type="xs:string"/>
-			<xs:attribute name="factoryClassName" type="xs:string"/>
-			<xs:attribute name="reuseFactory" type="xs:boolean"/>
-			<xs:attribute name="name" type="xs:string"/>
-			<xs:anyAttribute processContents="lax"/>
-		</xs:complexType>				
+			<xs:attribute name="className" type="xs:string" />
+			<xs:attribute name="factoryClassName" type="xs:string" />
+			<xs:attribute name="reuseFactory" type="xs:boolean" />
+			<xs:attribute name="name" type="xs:string" />
+			<xs:anyAttribute processContents="lax" />
+		</xs:complexType>
 	</xs:element>
-		
+
 	<xs:element name="features">
 		<xs:complexType>
 			<xs:sequence>
-				<xs:element ref="feature" minOccurs="1" maxOccurs="unbounded"/>
+				<xs:element ref="feature" minOccurs="1" maxOccurs="unbounded" />
 			</xs:sequence>
 		</xs:complexType>
 	</xs:element>
-	
+
 	<xs:element name="feature">
 		<xs:complexType>
-			<xs:attribute name="name" type="xs:string"/>
-			<xs:attribute name="numberFormat" type="xs:string"/>
+			<xs:attribute name="name" type="xs:string" />
+			<xs:attribute name="numberFormat" type="xs:string" />
 		</xs:complexType>
 	</xs:element>
 
 	<xs:element name="annotations">
 		<xs:complexType>
 			<xs:sequence>
-				<xs:element ref="annotation" minOccurs="1" maxOccurs="unbounded"/>
+				<xs:element ref="annotation" minOccurs="1" maxOccurs="unbounded" />
 			</xs:sequence>
 		</xs:complexType>
 	</xs:element>
-	
+
 	<xs:element name="annotation">
 		<xs:complexType>
 			<xs:sequence>
-				<xs:element ref="filters" minOccurs="0" maxOccurs="1"/>
-				<xs:element ref="features" minOccurs="1" maxOccurs="1"/>
-			</xs:sequence>				
-			<xs:attribute name="type" type="xs:string"/>
-			<xs:attribute name="sofa" type="xs:string"/>
-			<xs:attribute name="featurePath" type="xs:string"/>
-			<xs:attribute name="tokenizer" type="tokenizerType"/>	
-			<xs:attribute name="featureValueDelimiterString" type="xs:string"/>		
+				<xs:element ref="filters" minOccurs="0" maxOccurs="1" />
+				<xs:element ref="features" minOccurs="0" maxOccurs="1" />
+			</xs:sequence>
+			<xs:attribute name="type" type="xs:string" />
+			<xs:attribute name="sofa" type="xs:string" />
+			<xs:attribute name="featurePath" type="xs:string" />
+			<xs:attribute name="tokenizer" type="tokenizerType" />
+			<xs:attribute name="featureValueDelimiterString" type="xs:string" />
+		</xs:complexType>
+	</xs:element>
+
+	<xs:element name="termSetCoverDefinition">
+		<xs:complexType>
+			<xs:attribute name="coverDefinitionFile" type="xs:string" />
+			<xs:attribute name="generateFieldNameMethod" type="generateCoverFieldNameType" />
+			<xs:attribute name="ignoreCaseOfSelectedTerms" type="xs:boolean" />
 		</xs:complexType>
 	</xs:element>
 
 	<xs:element name="field">
 		<xs:complexType>
 			<xs:sequence>
-				<xs:element ref="filters" minOccurs="0" maxOccurs="1"/>
-				<xs:element ref="annotations" minOccurs="1" maxOccurs="1"/>
-			</xs:sequence>				
-			<xs:attribute name="name" type="xs:string"/>
-			<xs:attribute name="index" type="indexType"/>
-			<xs:attribute name="termVector" type="termVectorType"/>
-			<xs:attribute name="delimiter" type="xs:string"/>
-			<xs:attribute name="stored" type="storedType"/>
-			<xs:attribute name="merge" type="xs:boolean"/>			
+				<xs:element ref="termSetCoverDefinition" minOccurs="0"
+					maxOccurs="1" />
+				<xs:element ref="filters" minOccurs="0" maxOccurs="1" />
+				<xs:element ref="annotations" minOccurs="1" maxOccurs="1" />
+			</xs:sequence>
+			<xs:attribute name="name" type="xs:string" />
+			<xs:attribute name="index" type="indexType" />
+			<xs:attribute name="termVector" type="termVectorType" />
+			<xs:attribute name="delimiter" type="xs:string" />
+			<xs:attribute name="stored" type="storedType" />
+			<xs:attribute name="merge" type="xs:boolean" />
+			<xs:attribute name="unique" type="xs:boolean" />
+			<xs:attribute name="coverFile" type="xs:string" />
+
 		</xs:complexType>
 	</xs:element>
-	
+
 	<xs:element name="fields">
 		<xs:complexType>
 			<xs:sequence>
-				<xs:element ref="field" maxOccurs="unbounded"/>
+				<xs:element ref="field" maxOccurs="unbounded" />
 			</xs:sequence>
 		</xs:complexType>
 	</xs:element>

diff --git a/Lucas/src/test/java/org/apache/uima/lucas/indexer/analysis/DefaultFilterFactoryRegistryTest.java b/Lucas/src/test/java/org/apache/uima/lucas/indexer/analysis/DefaultFilterFactoryRegistryTest.java
index 34cdaae..b8c5918 100644
--- a/Lucas/src/test/java/org/apache/uima/lucas/indexer/analysis/DefaultFilterFactoryRegistryTest.java
+++ b/Lucas/src/test/java/org/apache/uima/lucas/indexer/analysis/DefaultFilterFactoryRegistryTest.java

@@ -19,13 +19,13 @@
 
 package org.apache.uima.lucas.indexer.analysis;
 
-import static org.junit.Assert.*;
-
 import java.util.Map;
 
 import org.junit.Before;
 import org.junit.Test;
 
+import static org.junit.Assert.assertEquals;
+
 public class DefaultFilterFactoryRegistryTest {
 
   private DefaultFilterFactoryRegistry defaultFilterFactoryRegistry;
@@ -38,7 +38,7 @@
   @Test
   public void testGetDefaultFilterFactories() {
     Map<String, TokenFilterFactory> registry = defaultFilterFactoryRegistry.getDefaultRegistry();
-    assertEquals(11, registry.size());
+    assertEquals(13, registry.size());
     
     TokenFilterFactory filterFactory = registry.get(DefaultFilterFactoryRegistry.ADDITION_FILTER_FACTORY_NAME);
     assertEquals(AdditionFilterFactory.class, filterFactory.getClass());
@@ -72,6 +72,12 @@
 
     filterFactory = registry.get(DefaultFilterFactoryRegistry.LOWERCASE_FILTER_FACTORY_NAME);
     assertEquals(LowerCaseFilterFactory.class, filterFactory.getClass());
+    
+    filterFactory = registry.get(DefaultFilterFactoryRegistry.SELECT_FILTER_FACTORY_NAME);
+    assertEquals(SelectFilterFactory.class, filterFactory.getClass());
+    
+    filterFactory = registry.get(DefaultFilterFactoryRegistry.REGEXP_FILTER_FACTORY_NAME);
+    assertEquals(RegExpFilterFactory.class, filterFactory.getClass());
   }
 
 }

diff --git a/Lucas/src/test/java/org/apache/uima/lucas/indexer/analysis/RegExpFilterFactoryTest.java b/Lucas/src/test/java/org/apache/uima/lucas/indexer/analysis/RegExpFilterFactoryTest.java
new file mode 100644
index 0000000..6ae1a37
--- /dev/null
+++ b/Lucas/src/test/java/org/apache/uima/lucas/indexer/analysis/RegExpFilterFactoryTest.java

@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.lucas.indexer.analysis;
+
+import java.util.Properties;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.uima.lucas.indexer.test.util.DummyTokenStream;
+import org.junit.Before;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * Testcase for {@link RegExpFilterFactory}
+ *
+ */
+public class RegExpFilterFactoryTest {
+	private final static String TEST_REGEXP = "[a-z][0-9]";
+	private final static String TEST_REPLACEMENT = "anything";
+	private RegExpFilterFactory regExpFilterFactory;
+	private TokenStream tokenStream;
+	
+	@Before
+	public void setUp(){
+		tokenStream = new DummyTokenStream("dummy", 1, 1, 0);
+		regExpFilterFactory = new RegExpFilterFactory();
+	}
+	
+	@Test
+	public void testCreateTokenFilter() throws Exception{
+		Properties properties = new Properties();
+		properties.setProperty(RegExpFilterFactory.REGEXP, TEST_REGEXP);
+		properties.setProperty(RegExpFilterFactory.REPLACEMENT, TEST_REPLACEMENT);
+		
+		RegExpFilter regExpFilter = (RegExpFilter) regExpFilterFactory.createTokenFilter(tokenStream, properties);
+		
+		assertEquals(TEST_REGEXP, regExpFilter.getRegExp());
+		assertEquals(TEST_REPLACEMENT, regExpFilter.getReplacement());
+	}
+}
+

diff --git a/Lucas/src/test/java/org/apache/uima/lucas/indexer/analysis/RegExpFilterTest.java b/Lucas/src/test/java/org/apache/uima/lucas/indexer/analysis/RegExpFilterTest.java
new file mode 100644
index 0000000..57f6345
--- /dev/null
+++ b/Lucas/src/test/java/org/apache/uima/lucas/indexer/analysis/RegExpFilterTest.java

@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.lucas.indexer.analysis;
+
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * @author faessler
+ *
+ */
+public class RegExpFilterTest {
+	@Test
+	public void testIncrementToken() throws Exception{
+		Tokenizer ts = new WhitespaceTokenizer(new StringReader("token1 token2 token3 token42"));
+
+		RegExpFilter filter = new RegExpFilter(ts, "[a-z][0-9]", "REPL");
+		
+		TermAttribute termAtt = (TermAttribute) filter.addAttribute(TermAttribute.class);
+		
+		filter.incrementToken();
+		assertEquals("tokeREPL", termAtt.term());
+		filter.incrementToken();
+		assertEquals("tokeREPL", termAtt.term());
+		filter.incrementToken();
+		assertEquals("tokeREPL", termAtt.term());
+		filter.incrementToken();
+		assertEquals("tokeREPL2", termAtt.term());
+	}
+}
+

diff --git a/Lucas/src/test/java/org/apache/uima/lucas/indexer/analysis/SelectFilterFactoryTest.java b/Lucas/src/test/java/org/apache/uima/lucas/indexer/analysis/SelectFilterFactoryTest.java
new file mode 100644
index 0000000..144e568
--- /dev/null
+++ b/Lucas/src/test/java/org/apache/uima/lucas/indexer/analysis/SelectFilterFactoryTest.java

@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.lucas.indexer.analysis;
+import java.io.IOException;
+import java.lang.reflect.Field;
+import java.util.Properties;
+
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.uima.lucas.indexer.test.util.DummyTokenStream;
+import org.junit.Test;
+
+import junit.framework.TestCase;
+
+import static org.apache.uima.lucas.indexer.analysis.StopwordFilterFactory.FILE_PATH_PARAMETER;
+
+/**
+ * Testcase for {@link SelectFilterFactory}
+ *
+ */
+public class SelectFilterFactoryTest extends TestCase {
+
+	@Test
+	public void test() throws IOException, SecurityException, NoSuchFieldException, IllegalArgumentException, IllegalAccessException {
+		
+		SelectFilterFactory selectFilterFactory = new SelectFilterFactory();
+		Properties properties = new Properties();
+		properties.put(FILE_PATH_PARAMETER, "src/test/resources/selectedwords.txt");
+		TokenFilter selectFilter = selectFilterFactory.createTokenFilter(new DummyTokenStream("dummy", 0, 0, 0), properties);
+		assertNotNull(selectFilter);
+		
+		Field includeWordsField = selectFilter.getClass().getDeclaredField("includeWords");
+		includeWordsField.setAccessible(true);
+		CharArraySet set = (CharArraySet) includeWordsField.get(selectFilter);
+		assertTrue("'positive1' is not in set", set.contains("positive1"));
+		assertTrue("'positive2' is not in set", set.contains("positive2"));
+		assertTrue("'positive3' is not in set", set.contains("positive3"));
+	}
+	
+}
+

diff --git a/Lucas/src/test/java/org/apache/uima/lucas/indexer/analysis/SelectFilterTest.java b/Lucas/src/test/java/org/apache/uima/lucas/indexer/analysis/SelectFilterTest.java
new file mode 100644
index 0000000..b233e8a
--- /dev/null
+++ b/Lucas/src/test/java/org/apache/uima/lucas/indexer/analysis/SelectFilterTest.java

@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.lucas.indexer.analysis;
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.uima.lucas.indexer.test.util.CollectionTokenStream;
+import org.junit.Before;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+import junit.framework.TestCase;
+
+import static org.apache.uima.lucas.indexer.util.TokenFactory.newToken;
+
+/**
+ * Testcase for {@link SelectFilter}
+ */
+public class SelectFilterTest extends TestCase {
+
+	private Set<String> selectedWords;
+	private CollectionTokenStream inputTokenStream;
+
+	@Before
+	public void setUp() {
+		selectedWords = new HashSet<String>();
+		selectedWords.add("positive1");
+		selectedWords.add("positive2");
+		selectedWords.add("positive3");
+
+		Token t1 = newToken("negative1", 0, 9);
+		Token t2 = newToken("positive3", 9, 18);
+		Token t3 = newToken("positive1", 18, 27);
+		Token t4 = newToken("negative2", 27, 36);
+		Token t5 = newToken("positive2", 36, 45);
+		Token t6 = newToken("negative3", 45, 54);
+		Token t7 = newToken("negative4", 54, 63);
+		inputTokenStream = new CollectionTokenStream(Lists.newArrayList(t1, t2,
+				t3, t4, t5, t6, t7));
+	}
+
+	@Test
+	public void testWithEnablePositionIncrement() throws IOException {
+		inputTokenStream.reset();
+
+		SelectFilter selectFilter = new SelectFilter(true, inputTokenStream,
+				selectedWords, true);
+
+		TermAttribute tAtt = (TermAttribute) selectFilter
+				.addAttribute(TermAttribute.class);
+		PositionIncrementAttribute posAtt = (PositionIncrementAttribute) selectFilter
+				.addAttribute(PositionIncrementAttribute.class);
+		selectFilter.incrementToken();
+		assertEquals("Selected Token 1", "positive3", tAtt.term());
+		assertEquals("Position increment one token skipped", 2,
+				posAtt.getPositionIncrement());
+
+		selectFilter.incrementToken();
+		assertEquals("Selected Token 2", "positive1", tAtt.term());
+		assertEquals("Position increment next contigous token", 1,
+				posAtt.getPositionIncrement());
+
+		selectFilter.incrementToken();
+		assertEquals("Selected Token 3", "positive2", tAtt.term());
+		assertEquals("Position increment one token skipped", 2,
+				posAtt.getPositionIncrement());
+	}
+
+	@Test
+	public void testWithoutEnablePositionIncrement() throws IOException {
+		inputTokenStream.reset();
+
+		SelectFilter selectFilter = new SelectFilter(false, inputTokenStream,
+				selectedWords, true);
+
+		TermAttribute tAtt = (TermAttribute) selectFilter
+				.addAttribute(TermAttribute.class);
+		PositionIncrementAttribute posAtt = (PositionIncrementAttribute) selectFilter
+				.addAttribute(PositionIncrementAttribute.class);
+		selectFilter.incrementToken();
+		assertEquals("Selected Token 1", "positive3", tAtt.term());
+		assertEquals("Position increment without skipping", 1,
+				posAtt.getPositionIncrement());
+
+		selectFilter.incrementToken();
+		assertEquals("Selected Token 2", "positive1", tAtt.term());
+		assertEquals("Position increment without skipping", 1,
+				posAtt.getPositionIncrement());
+
+		selectFilter.incrementToken();
+		assertEquals("Selected Token 3", "positive2", tAtt.term());
+		assertEquals("Position increment without skipping", 1,
+				posAtt.getPositionIncrement());
+	}
+}

diff --git a/Lucas/src/test/java/org/apache/uima/lucas/indexer/mapping/MappingFileReaderTest.java b/Lucas/src/test/java/org/apache/uima/lucas/indexer/mapping/MappingFileReaderTest.java
index daf1722..b5ad5fa 100644
--- a/Lucas/src/test/java/org/apache/uima/lucas/indexer/mapping/MappingFileReaderTest.java
+++ b/Lucas/src/test/java/org/apache/uima/lucas/indexer/mapping/MappingFileReaderTest.java

@@ -19,8 +19,6 @@
 
 package org.apache.uima.lucas.indexer.mapping;
 
-import static org.junit.Assert.*;
-
 import java.io.File;
 import java.io.IOException;
 import java.util.Collection;
@@ -28,13 +26,16 @@
 import java.util.Iterator;
 import java.util.Map;
 import java.util.Properties;
-
 import javax.xml.parsers.SAXParserFactory;
 
 import org.junit.Before;
 import org.junit.Test;
 import org.xml.sax.SAXException;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
 public class MappingFileReaderTest {
 
   private static final String FACTORY_NAME = "myFactory";
@@ -53,12 +54,15 @@
   private static final String YES = "yes";
   private static final String FIELD_NAME = "annotation1";
   private static final String MAPPING_FILE = "src/test/resources/MappingFileReaderTest.xml";
+  private static final String COVER_FILE_PATH = "pathToACoverDefinitionFile";
+  private static final String GENERATE_FIELD_NAME_METHOD = "append";
   private MappingFileReader mappingFileReader;
   
   @Before
   public void setUp() throws Exception{
 	Map<String, ElementMapper<?>> elementMappers = new HashMap<String, ElementMapper<?>>();
 	elementMappers.put(MappingFileReader.ANNOTATION, new AnnotationMapper());
+	elementMappers.put(MappingFileReader.TERM_SET_COVER_DEFINITION, new TermCoverMapper());
 	elementMappers.put(MappingFileReader.FILTER, new FilterMapper());
 	elementMappers.put(MappingFileReader.FIELD, new FieldMapper());
 	elementMappers.put(MappingFileReader.FEATURE, new FeatureMapper());
@@ -87,7 +91,7 @@
 	  assertEquals(TESTFACTORY, filterDescription.getFactoryClassName());
 	  assertEquals(FACTORY_NAME, filterDescription.getName());
 	  assertTrue(filterDescription.isReuseFactory());
-	  assertEquals(29, filterDescription.getLineNumber());
+	  assertEquals(31, filterDescription.getLineNumber());
 	  assertEquals(56, filterDescription.getColumnNumber());
 	  
 	  Properties properties = filterDescription.getProperties(); 
@@ -103,7 +107,7 @@
 	  assertEquals(FEATURE_PATH, annotationDescription.getFeaturePath());
 	  assertEquals(VALUE_DELIMITER_STRING, annotationDescription.getFeatureValueDelimiterString());
 	  assertEquals(TOKENIZER, annotationDescription.getTokenizer());
-	  assertEquals(33, annotationDescription.getLineNumber());
+	  assertEquals(35, annotationDescription.getLineNumber());
 	  assertEquals(113, annotationDescription.getColumnNumber());
 
 	  filterDescriptions = annotationDescription.getFilterDescriptions();
@@ -113,7 +117,7 @@
 	  assertEquals(TESTFACTORY, filterDescription.getFactoryClassName());
 	  assertEquals(FACTORY_NAME, filterDescription.getName());
 	  assertTrue(filterDescription.isReuseFactory());
-	  assertEquals(36, filterDescription.getLineNumber());
+	  assertEquals(38, filterDescription.getLineNumber());
 	  assertEquals(58, filterDescription.getColumnNumber());
 
 	  properties = filterDescription.getProperties(); 
@@ -124,7 +128,15 @@
 	  FeatureDescription featureDescription = featureDescriptions.iterator().next();
 	  assertEquals(FEATURE_NAME, featureDescription.getFeatureName());
 	  assertEquals(NUMBER_FORMAT, featureDescription.getNumberFormat());
-	  assertEquals(39, featureDescription.getLineNumber());
+	  assertEquals(41, featureDescription.getLineNumber());
 	  assertEquals(57, featureDescription.getColumnNumber());
+	  
+	  TermCoverDescription termCoverDescription = fieldDescription.getTermCoverDescription();
+	  assertNotNull(termCoverDescription);
+	  assertEquals(COVER_FILE_PATH, termCoverDescription.getCoverDefinitionFile());
+	  assertEquals(GENERATE_FIELD_NAME_METHOD, termCoverDescription.getGenerateFieldNameMethod());
+	  assertTrue(termCoverDescription.getIgnoreCaseOfSelectedTerms());
+	  assertEquals(28, termCoverDescription.getLineNumber());
+	  assertEquals(72, termCoverDescription.getColumnNumber());
 	}
 }

diff --git a/Lucas/src/test/resources/MappingFileReaderTest.xml b/Lucas/src/test/resources/MappingFileReaderTest.xml
index 9e034e7..8207da9 100644
--- a/Lucas/src/test/resources/MappingFileReaderTest.xml
+++ b/Lucas/src/test/resources/MappingFileReaderTest.xml

@@ -24,6 +24,8 @@
 
 	<field name="annotation1" index="yes" termVector="no" stored="yes"
 		merge="true">
+		<termSetCoverDefinition coverDefinitionFile="pathToACoverDefinitionFile"
+			generateFieldNameMethod="append" ignoreCaseOfSelectedTerms="true" />
 		<filters>
 			<filter className="testfilter" factoryClassName="testfactory"
 				key="value" reuseFactory="true" name="myFactory" />

diff --git a/Lucas/src/test/resources/lucas.xsd b/Lucas/src/test/resources/lucas.xsd
index 72469df..a7b0af9 100644
--- a/Lucas/src/test/resources/lucas.xsd
+++ b/Lucas/src/test/resources/lucas.xsd

@@ -1,138 +1,156 @@
 <?xml version="1.0"?>
 
-<!--
-   Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License.    
--->
-
-<!--
-   Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License.    
--->
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor 
+	license agreements. See the NOTICE file distributed with this work for additional 
+	information regarding copyright ownership. The ASF licenses this file to 
+	you under the Apache License, Version 2.0 (the "License"); you may not use 
+	this file except in compliance with the License. You may obtain a copy of 
+	the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required 
+	by applicable law or agreed to in writing, software distributed under the 
+	License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 
+	OF ANY KIND, either express or implied. See the License for the specific 
+	language governing permissions and limitations under the License. -->
 
 <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
-		   elementFormDefault="qualified">
+	elementFormDefault="qualified">
 
 	<xs:simpleType name="indexType">
-  		<xs:restriction base="xs:string">
-    		<xs:enumeration value="yes"/>
-    		<xs:enumeration value="no"/>
-    		<xs:enumeration value="no_norms"/>
-    		<xs:enumeration value="no_tf"/>
-    		<xs:enumeration value="no_norms_tf"/>
-  		</xs:restriction>
+		<xs:restriction base="xs:string">
+			<xs:enumeration value="yes" />
+			<xs:enumeration value="no" />
+			<xs:enumeration value="no_norms" />
+			<xs:enumeration value="no_tf" />
+			<xs:enumeration value="no_norms_tf" />
+		</xs:restriction>
 	</xs:simpleType>
 
 	<xs:simpleType name="termVectorType">
-  		<xs:restriction base="xs:string">
-    		<xs:enumeration value="no"/>
-    		<xs:enumeration value="positions"/>
-    		<xs:enumeration value="offsets"/>
-    		<xs:enumeration value="positions_offsets"/>
-  		</xs:restriction>
+		<xs:restriction base="xs:string">
+			<xs:enumeration value="no" />
+			<xs:enumeration value="positions" />
+			<xs:enumeration value="offsets" />
+			<xs:enumeration value="positions_offsets" />
+		</xs:restriction>
 	</xs:simpleType>
 
 	<xs:simpleType name="storedType">
-  		<xs:restriction base="xs:string">
-    		<xs:enumeration value="yes"/>
-    		<xs:enumeration value="no"/>
-    		<xs:enumeration value="compress"/>
-  		</xs:restriction>
+		<xs:restriction base="xs:string">
+			<xs:enumeration value="yes" />
+			<xs:enumeration value="no" />
+			<xs:enumeration value="compress" />
+		</xs:restriction>
 	</xs:simpleType>
 
 	<xs:simpleType name="tokenizerType">
-  		<xs:restriction base="xs:string">
-    		<xs:enumeration value="cas"/>
-    		<xs:enumeration value="whitespace"/>
-    		<xs:enumeration value="standard"/>
-  		</xs:restriction>
+		<xs:restriction base="xs:string">
+			<xs:enumeration value="cas" />
+			<xs:enumeration value="whitespace" />
+			<xs:enumeration value="standard" />
+		</xs:restriction>
 	</xs:simpleType>
 
 	<xs:simpleType name="positionType">
-  		<xs:restriction base="xs:string">
-    		<xs:enumeration value="first"/>
-    		<xs:enumeration value="last"/>
-  		</xs:restriction>
-	</xs:simpleType>	
+		<xs:restriction base="xs:string">
+			<xs:enumeration value="first" />
+			<xs:enumeration value="last" />
+		</xs:restriction>
+	</xs:simpleType>
 	
-	<xs:element name="feature">
+	<xs:simpleType name="generateCoverFieldNameType">
+		<xs:restriction base="xs:string">
+			<xs:enumeration value="append" />
+			<xs:enumeration value="prepend" />
+			<xs:enumeration value="replace" />
+		</xs:restriction>
+	</xs:simpleType>
+
+	<xs:element name="filters">
 		<xs:complexType>
-			<xs:attribute name="name" type="xs:string"/>
-			<xs:attribute name="uppercase" type="xs:boolean"/>
-			<xs:attribute name="lowercase" type="xs:boolean"/>
-			<xs:attribute name="numberFormat" type="xs:string"/>
+			<xs:sequence>
+				<xs:element ref="filter" minOccurs="1" maxOccurs="unbounded" />
+			</xs:sequence>
 		</xs:complexType>
 	</xs:element>
-	
+
+	<xs:element name="filter">
+		<xs:complexType>
+			<xs:attribute name="className" type="xs:string" />
+			<xs:attribute name="factoryClassName" type="xs:string" />
+			<xs:attribute name="reuseFactory" type="xs:boolean" />
+			<xs:attribute name="name" type="xs:string" />
+			<xs:anyAttribute processContents="lax" />
+		</xs:complexType>
+	</xs:element>
+
+	<xs:element name="features">
+		<xs:complexType>
+			<xs:sequence>
+				<xs:element ref="feature" minOccurs="1" maxOccurs="unbounded" />
+			</xs:sequence>
+		</xs:complexType>
+	</xs:element>
+
+	<xs:element name="feature">
+		<xs:complexType>
+			<xs:attribute name="name" type="xs:string" />
+			<xs:attribute name="numberFormat" type="xs:string" />
+		</xs:complexType>
+	</xs:element>
+
+	<xs:element name="annotations">
+		<xs:complexType>
+			<xs:sequence>
+				<xs:element ref="annotation" minOccurs="1" maxOccurs="unbounded" />
+			</xs:sequence>
+		</xs:complexType>
+	</xs:element>
+
 	<xs:element name="annotation">
 		<xs:complexType>
 			<xs:sequence>
-				<xs:element ref="feature" minOccurs="0" maxOccurs="unbounded"/>
-			</xs:sequence>				
-			<xs:attribute name="type" type="xs:string"/>
-			<xs:attribute name="sofa" type="xs:string"/>
-			<xs:attribute name="featurePath" type="xs:string"/>
-			<xs:attribute name="concatString" type="xs:string"/>
-			<xs:attribute name="splittString" type="xs:string"/>
-			<xs:attribute name="prefix" type="xs:string"/>
-			<xs:attribute name="uppercase" type="xs:boolean"/>
-			<xs:attribute name="lowercase" type="xs:boolean"/>
-			<xs:attribute name="stopwordRemove" type="xs:boolean"/>
-			<xs:attribute name="position" type="positionType"/>
-			<xs:attribute name="addHypernyms" type="xs:boolean"/>
-			<xs:attribute name="mappingFile" type="xs:string"/>
-			<xs:attribute name="snowballFilter" type="xs:string"/>
-			<xs:attribute name="unique" type="xs:boolean"/>
-			<xs:attribute name="tokenizer" type="tokenizerType"/>			
+				<xs:element ref="filters" minOccurs="0" maxOccurs="1" />
+				<xs:element ref="features" minOccurs="0" maxOccurs="1" />
+			</xs:sequence>
+			<xs:attribute name="type" type="xs:string" />
+			<xs:attribute name="sofa" type="xs:string" />
+			<xs:attribute name="featurePath" type="xs:string" />
+			<xs:attribute name="tokenizer" type="tokenizerType" />
+			<xs:attribute name="featureValueDelimiterString" type="xs:string" />
+		</xs:complexType>
+	</xs:element>
+
+	<xs:element name="termSetCoverDefinition">
+		<xs:complexType>
+			<xs:attribute name="coverDefinitionFile" type="xs:string" />
+			<xs:attribute name="generateFieldNameMethod" type="generateCoverFieldNameType" />
+			<xs:attribute name="ignoreCaseOfSelectedTerms" type="xs:boolean" />
 		</xs:complexType>
 	</xs:element>
 
 	<xs:element name="field">
 		<xs:complexType>
 			<xs:sequence>
-				<xs:element ref="annotation" maxOccurs="unbounded"/>
-			</xs:sequence>				
-			<xs:attribute name="name" type="xs:string"/>
-			<xs:attribute name="index" type="indexType"/>
-			<xs:attribute name="termVector" type="termVectorType"/>
-			<xs:attribute name="delimiter" type="xs:string"/>
-			<xs:attribute name="stored" type="storedType"/>
-			<xs:attribute name="merge" type="xs:boolean"/>			
+				<xs:element ref="termSetCoverDefinition" minOccurs="0"
+					maxOccurs="1" />
+				<xs:element ref="filters" minOccurs="0" maxOccurs="1" />
+				<xs:element ref="annotations" minOccurs="1" maxOccurs="1" />
+			</xs:sequence>
+			<xs:attribute name="name" type="xs:string" />
+			<xs:attribute name="index" type="indexType" />
+			<xs:attribute name="termVector" type="termVectorType" />
+			<xs:attribute name="delimiter" type="xs:string" />
+			<xs:attribute name="stored" type="storedType" />
+			<xs:attribute name="merge" type="xs:boolean" />
+			<xs:attribute name="unique" type="xs:boolean" />
+			<xs:attribute name="coverFile" type="xs:string" />
+
 		</xs:complexType>
 	</xs:element>
-	
+
 	<xs:element name="fields">
 		<xs:complexType>
 			<xs:sequence>
-				<xs:element ref="field" maxOccurs="unbounded"/>
+				<xs:element ref="field" maxOccurs="unbounded" />
 			</xs:sequence>
 		</xs:complexType>
 	</xs:element>
commit	bd3841fa3daaee2555ef807316eb6195eba52e84	[log] [tgz]
author	Tommaso Teofili <tommaso@apache.org>	Mon Jul 22 10:25:15 2013 +0000
committer	Tommaso Teofili <tommaso@apache.org>	Mon Jul 22 10:25:15 2013 +0000
tree	b0d66779e168adcfc886a3580a3646762250c7e2
parent	2c58543daad70928bb97586a971607c2133b4545 [diff]