[UIMA-5764] concept mapper - add buffering to IO, fix up javadocs, have pom inherit from uima-wide parent so can be independently releasable, put xml resources into the jar, modify some configs to allow reading from classpath source or file system source. git-svn-id: https://svn.apache.org/repos/asf/uima/addons/trunk@1829466 13f79535-47bb-0310-9956-ffa450edef68

commit: 08144aab5832c18f72c5ace487cc7123e40e428f [log] [tgz]
author: Marshall Schor <schor@apache.org> Wed Apr 18 17:42:46 2018 +0000
committer: Marshall Schor <schor@apache.org> Wed Apr 18 17:42:46 2018 +0000
tree: cc0a1e7fb4143ce88b0d68d9163858998d6e8459
parent: 92040bbb73997a1d356c98551e6da64acef5955e [diff]
diff --git a/ConceptMapper/pom.xml b/ConceptMapper/pom.xml
index 08dcda7..de34d94 100644
--- a/ConceptMapper/pom.xml
+++ b/ConceptMapper/pom.xml

@@ -22,13 +22,13 @@
   
   <parent>
     <groupId>org.apache.uima</groupId>
-    <artifactId>uima-addons-parent</artifactId>
-    <version>2.3.2-SNAPSHOT</version>
-    <relativePath>../uima-addons-parent</relativePath>
+    <artifactId>parent-pom</artifactId>
+    <version>11</version>
+    <relativePath></relativePath>
   </parent>
   
   <artifactId>ConceptMapper</artifactId>
-  <version>2.3.2-SNAPSHOT</version>
+  <version>2.10.2-SNAPSHOT</version>
   <name>Apache UIMA Annotator: ${project.artifactId}</name>
   <description>Extracts concepts from a CAS</description>
   <url>${uimaWebsiteUrl}</url>
@@ -53,11 +53,101 @@
     </url>
   </scm>
   
+    <!-- The repositories and pluginRepositories section is duplicated from
+       the parent pom one, and adds the Apache Snapshot Nexus repository
+       where UIMA snapshots are deployed.  This is needed if for instance,
+       a project depends on some new SNAPSHOT level of a build tool, 
+       where the users hasn't checked out the build tooling.
+       
+       This allows maven to find the snapshots when looking for the parent of
+       this pom -->
+  <repositories>
+    <repository>
+      <id>eclipsePlugins</id>
+      <name>Eclipse components</name>
+      <layout>default</layout>
+      <url>http://repo1.maven.org/eclipse</url>
+      
+      <releases>
+        <updatePolicy>never</updatePolicy>
+        <checksumPolicy>fail</checksumPolicy>
+      </releases>
+      
+      <snapshots>
+        <enabled>false</enabled>
+      </snapshots>
+    </repository>
+
+    <!-- modify central repository access:
+         Turn on checksum checking-->
+    <repository>
+      <id>central</id>
+      <name>Maven Repository Switchboard</name>
+      <layout>default</layout>
+      <url>http://repo1.maven.org/maven2</url>
+
+      <releases>
+        <enabled>true</enabled>
+        <checksumPolicy>fail</checksumPolicy>
+        <updatePolicy>never</updatePolicy>
+      </releases>
+
+      <snapshots>
+        <enabled>false</enabled>
+      </snapshots>
+
+    </repository>
+    
+    <repository>
+      <id>apache.snapshots</id>
+      <name>Apache Snapshot Repository</name>
+      <url>http://repository.apache.org/snapshots</url>
+      <releases>
+        <enabled>false</enabled>
+      </releases>
+    </repository>    
+    
+  </repositories>
+  
+  <pluginRepositories>
+    <pluginRepository>
+      <id>apache.snapshots.plugins</id>
+      <name>Apache Snapshot Repository - Maven plugins</name>
+      <url>http://repository.apache.org/snapshots</url>
+      <layout>default</layout>
+      <releases>
+        <enabled>false</enabled>
+      </releases>
+      <snapshots>
+        <enabled>true</enabled>
+        <checksumPolicy>fail</checksumPolicy>
+        <updatePolicy>never</updatePolicy>        
+      </snapshots>
+    </pluginRepository>
+  </pluginRepositories>
+  
   <properties>
+    <jiraVersion>ConceptMapper-2.10.2</jiraVersion>
     <uimaScmProject>${project.artifactId}</uimaScmProject>
-    <uimaDependencyVersion>2.4.0</uimaDependencyVersion>
+    <uimaDependencyVersion>2.10.2</uimaDependencyVersion>
     <pearMainDescriptor>desc/analysis_engine/primitive/ConceptMapperOffsetTokenizer.xml</pearMainDescriptor>
     <bookNameRoot>ConceptMapperAnnotatorUserGuide</bookNameRoot>
+ 
+    <!-- 
+     Configuring settings is best done through default properties that multiple plugins.
+     Local configurations within plugins should be avoided. Where plugins do not pick up default
+     properties already, they should be injected manually into the plugins. 
+    -->    
+    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+    <maven.compiler.target>1.7</maven.compiler.target>
+    <maven.compiler.source>1.7</maven.compiler.source>
+    <maven.surefire.heap>512m</maven.surefire.heap>
+    <maven.surefire.argLine />
+    <maven.surefire.java9 />
+    
+    <jacoco.argLine />
+    <api_check_oldVersion>2.3.1</api_check_oldVersion>
+    
   </properties>
       
   <dependencies>
@@ -70,7 +160,7 @@
   </dependencies>
   
   <build>
-    <finalName>uima-an-conceptMapper</finalName>
+    
     <pluginManagement>
       <plugins>
         <plugin>
@@ -95,9 +185,18 @@
               </configuration>
             </execution>
           </executions>
-        </plugin>         
-      </plugins>
+        </plugin> 
+     
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-surefire-plugin</artifactId>
+          <version>2.20.1</version>
+          <configuration>
+            <argLine>@{jacoco.argLine} -Xmx@{maven.surefire.heap} -Xms@{maven.surefire.heap} @{maven.surefire.argLine} @{maven.surefire.java9}</argLine>
+          </configuration>
+        </plugin>
+       </plugins>
     </pluginManagement> 
-    
+       
   </build>
 </project>

diff --git a/ConceptMapper/src/docbook/ConceptMapperAnnotatorUserGuide.xml b/ConceptMapper/src/docbook/ConceptMapperAnnotatorUserGuide.xml
index 7ae4f66..9d10efe 100644
--- a/ConceptMapper/src/docbook/ConceptMapperAnnotatorUserGuide.xml
+++ b/ConceptMapper/src/docbook/ConceptMapperAnnotatorUserGuide.xml

@@ -98,7 +98,23 @@
 			The result of running ConceptMapper are UIMA annotations, and there are two configuration parameters that are used to map the attributes from the dictionary (see <xref linkend="ConceptMapper.param.attributelist"/>) to features of UIMA annotations (see <xref linkend="ConceptMapper.param.featurelist"/>).
 		</para>
 		<para>
-			The entire dictionary is loaded into memory, which, in conjunction with an efficient data structure, provides very fast lookups. As stated earlier, dictionaries with millions of entries have been used without any performance issues. The obvious drawback to storing the dictionary in memory is that large dictionaries require large amounts of memory; this is partially mitigated by the fact that the dictionary is implemented as a UIMA shared resource (see <xref linkend="ConceptMapper.res.dictionaryfile"/>). This means that multiple annotators, such as multiple instances of ConceptMapper that are set up using different parameters, can all access it without having to load it more than once. The dictionary loader is specified in the external resource section of the descriptor, and is expected to implement the interface <interfacename>org.apache.uima.conceptMapper.support.dictionaryResource.DictionaryResource</interfacename>. Two implementations are included in the distribution, <classname>org.apache.uima.conceptMapper.support.dictionaryResource.DictionaryResource_impl</classname>, the standard implementation, which loads an XML version of a dictionary, and <classname>org.apache.uima.conceptMapper.support.dictionaryResource.CompiledDictionaryResource_impl</classname> which loads a pre-compiled version, for faster loading. The compiler is supplied as <classname>org.apache.uima.conceptMapper.dictionaryCompiler.CompileDictionary</classname>, which takes two arguments, a ConceptMapper analysis engine descriptor that loads the dictionary using the standard dictionary loader, and the name of the output file into which to write the compiled dictionary.
+			The entire dictionary is loaded into memory, which, in conjunction with an efficient data structure, provides very fast lookups. 
+			As stated earlier, dictionaries with millions of entries have been used without any performance issues. 
+			The obvious drawback to storing the dictionary in memory is that large dictionaries require large amounts of memory; 
+			this is partially mitigated by the fact that the dictionary is implemented as a UIMA shared resource 
+			(see <xref linkend="ConceptMapper.res.dictionaryfile"/>). 
+			This means that multiple annotators, such as multiple instances of ConceptMapper that are set up using different parameters, 
+			can all access it without having to load it more than once. 
+			The dictionary loader is specified in the external resource section of the descriptor, 
+			and is expected to implement the interface <interfacename>org.apache.uima.conceptMapper.support.dictionaryResource.DictionaryResource</interfacename>. 
+			Two implementations are included in the distribution, 
+			<classname>org.apache.uima.conceptMapper.support.dictionaryResource.DictionaryResource_impl</classname>, 
+			the standard implementation, which loads an XML version of a dictionary, 
+			and <classname>org.apache.uima.conceptMapper.support.dictionaryResource.CompiledDictionaryResource_impl</classname> 
+			which loads a pre-compiled version, for faster loading. 
+			The compiler is supplied as <classname>org.apache.uima.conceptMapper.dictionaryCompiler.CompileDictionary</classname>, 
+			which takes two arguments, a ConceptMapper analysis engine descriptor that loads the dictionary using the standard dictionary loader, 
+			and the name of the output file into which to write the compiled dictionary.
 		</para>
 		</section>
 		<section id="tokenizer">

diff --git a/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/ConceptMapper.java b/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/ConceptMapper.java
index a7930e2..3ae3eb1 100644
--- a/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/ConceptMapper.java
+++ b/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/ConceptMapper.java

@@ -28,13 +28,13 @@
 import java.util.Map;
 import java.util.TreeMap;
 
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.analysis_engine.ResultSpecification;
 import org.apache.uima.analysis_engine.annotator.AnnotatorConfigurationException;
-import org.apache.uima.analysis_engine.annotator.AnnotatorContext;
-import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_engine.annotator.AnnotatorInitializationException;
-import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
+import org.apache.uima.cas.CAS;
 import org.apache.uima.cas.FSIndex;
 import org.apache.uima.cas.FSIterator;
 import org.apache.uima.cas.Feature;
@@ -43,15 +43,14 @@
 import org.apache.uima.cas.TypeSystem;
 import org.apache.uima.cas.text.AnnotationFS;
 import org.apache.uima.cas.text.AnnotationIndex;
-import org.apache.uima.cas.CAS;
 import org.apache.uima.conceptMapper.support.dictionaryResource.DictionaryResource;
-import org.apache.uima.conceptMapper.support.dictionaryResource.EntryProperties;
 import org.apache.uima.conceptMapper.support.dictionaryResource.DictionaryResource.DictEntry;
+import org.apache.uima.conceptMapper.support.dictionaryResource.EntryProperties;
 import org.apache.uima.conceptMapper.support.tokens.TokenFilter;
 import org.apache.uima.conceptMapper.support.tokens.TokenNormalizer;
 import org.apache.uima.conceptMapper.support.tokens.UnknownTypeException;
-import org.apache.uima.jcas.cas.FSArray;
 import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.FSArray;
 import org.apache.uima.jcas.tcas.Annotation;
 import org.apache.uima.resource.ResourceInitializationException;
 
@@ -221,7 +220,7 @@
 
   private Type spanFeatureStructureType;
 
-  private Logger logger;
+  public  Logger logger;
 
   private JCas jcas;
 
@@ -368,6 +367,8 @@
    * 
    * @param typeSystem
    *          the current type system.
+   * @throws AnnotatorConfigurationException -
+   * @throws AnnotatorInitializationException - 
    * @see org.apache.uima.analysis_engine.annotator.TextAnnotator#typeSystemInit(TypeSystem)
    */
   public void typeSystemInit(TypeSystem typeSystem) throws AnnotatorConfigurationException,
@@ -482,10 +483,9 @@
    * Perform the actual analysis. Iterate over the document content looking for any matching words
    * or phrases in the loaded dictionary and post an annotation for each match found.
    * 
-   * @param tcas
+   * @param jCas
    *          the current CAS to process.
-   * @param aResultSpec
-   *          a specification of the result annotation that should be created by this annotator
+   * @throws AnalysisEngineProcessException -
    * 
    * @see org.apache.uima.analysis_engine.annotator.TextAnnotator#process(CAS,ResultSpecification)
    */
@@ -844,10 +844,11 @@
   }
 
   /**
-   * @param searchStrategy
-   * @param tcas
-   * @param tokens
-   * @param spanAnnotation
+   * @param searchStrategy -
+   * @param findAllMatches true to find all matches
+   * @param tcas the Cas
+   * @param tokens -
+   * @param spanAnnotation -
    */
   protected void processTokenList(int searchStrategy, boolean findAllMatches, CAS tcas,
           ArrayList<AnnotationFS> tokens, Annotation spanAnnotation) {
@@ -961,10 +962,15 @@
   }
 
   /**
-   * @param start
-   * @param end
-   * @param properties
-   * @param matched
+   * @param tcas -
+   * @param start -
+   * @param end -
+   * @param properties -
+   * @param spanAnnotation -
+   * @param matchedText -
+   * @param matched -
+   * @param log -
+
    */
   protected void makeAnnotation(CAS tcas, int start, int end, EntryProperties properties,
           Annotation spanAnnotation, String matchedText, Collection<AnnotationFS> matched,

diff --git a/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/DictTerm.java b/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/DictTerm.java
index a6e90a3..f11db7b 100644
--- a/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/DictTerm.java
+++ b/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/DictTerm.java

@@ -33,19 +33,28 @@
   protected DictTerm() {}
     
   /** Internal - constructor used by generator 
-   * @generated */
+   * @generated 
+   * @param addr -
+   * @param type -
+   */
   public DictTerm(int addr, TOP_Type type) {
     super(addr, type);
     readObject();
   }
   
-  /** @generated */
+  /** @generated 
+   * @param jcas -
+   */
   public DictTerm(JCas jcas) {
     super(jcas);
     readObject();   
   } 
 
-  /** @generated */  
+  /** @generated 
+   * @param jcas -
+   * @param begin -
+   * @param end -
+   */
   public DictTerm(JCas jcas, int begin, int end) {
     super(jcas);
     setBegin(begin);
@@ -65,13 +74,16 @@
   //* Feature: DictCanon
 
   /** getter for DictCanon - gets canonical form
-   * @generated */
+   * @generated
+   * @return - 
+   * */
   public String getDictCanon() {
     if (DictTerm_Type.featOkTst && ((DictTerm_Type)jcasType).casFeat_DictCanon == null)
       jcasType.jcas.throwFeatMissing("DictCanon", "org.apache.uima.conceptMapper.DictTerm");
     return jcasType.ll_cas.ll_getStringValue(addr, ((DictTerm_Type)jcasType).casFeatCode_DictCanon);}
     
   /** setter for DictCanon - sets canonical form 
+   * @param v -
    * @generated */
   public void setDictCanon(String v) {
     if (DictTerm_Type.featOkTst && ((DictTerm_Type)jcasType).casFeat_DictCanon == null)
@@ -83,13 +95,15 @@
   //* Feature: enclosingSpan
 
   /** getter for enclosingSpan - gets span that this NoTerm is contained within (i.e. its sentence)
+   * @return -
    * @generated */
   public Annotation getEnclosingSpan() {
     if (DictTerm_Type.featOkTst && ((DictTerm_Type)jcasType).casFeat_enclosingSpan == null)
       jcasType.jcas.throwFeatMissing("enclosingSpan", "org.apache.uima.conceptMapper.DictTerm");
     return (Annotation)(jcasType.ll_cas.ll_getFSForRef(jcasType.ll_cas.ll_getRefValue(addr, ((DictTerm_Type)jcasType).casFeatCode_enclosingSpan)));}
     
-  /** setter for enclosingSpan - sets span that this NoTerm is contained within (i.e. its sentence) 
+  /** setter for enclosingSpan - sets span that this NoTerm is contained within (i.e. its sentence)
+   * @param v -  
    * @generated */
   public void setEnclosingSpan(Annotation v) {
     if (DictTerm_Type.featOkTst && ((DictTerm_Type)jcasType).casFeat_enclosingSpan == null)
@@ -101,13 +115,15 @@
   //* Feature: matchedText
 
   /** getter for matchedText - gets 
+   * @return -
    * @generated */
   public String getMatchedText() {
     if (DictTerm_Type.featOkTst && ((DictTerm_Type)jcasType).casFeat_matchedText == null)
       jcasType.jcas.throwFeatMissing("matchedText", "org.apache.uima.conceptMapper.DictTerm");
     return jcasType.ll_cas.ll_getStringValue(addr, ((DictTerm_Type)jcasType).casFeatCode_matchedText);}
     
-  /** setter for matchedText - sets  
+  /** setter for matchedText - sets
+   * @param v -  
    * @generated */
   public void setMatchedText(String v) {
     if (DictTerm_Type.featOkTst && ((DictTerm_Type)jcasType).casFeat_matchedText == null)
@@ -118,7 +134,8 @@
   //*--------------*
   //* Feature: matchedTokens
 
-  /** getter for matchedTokens - gets 
+  /** getter for matchedTokens - gets
+   * @return - 
    * @generated */
   public FSArray getMatchedTokens() {
     if (DictTerm_Type.featOkTst && ((DictTerm_Type)jcasType).casFeat_matchedTokens == null)
@@ -126,13 +143,17 @@
     return (FSArray)(jcasType.ll_cas.ll_getFSForRef(jcasType.ll_cas.ll_getRefValue(addr, ((DictTerm_Type)jcasType).casFeatCode_matchedTokens)));}
     
   /** setter for matchedTokens - sets  
-   * @generated */
+   * @generated 
+   * @param v -
+   */
   public void setMatchedTokens(FSArray v) {
     if (DictTerm_Type.featOkTst && ((DictTerm_Type)jcasType).casFeat_matchedTokens == null)
       jcasType.jcas.throwFeatMissing("matchedTokens", "org.apache.uima.conceptMapper.DictTerm");
     jcasType.ll_cas.ll_setRefValue(addr, ((DictTerm_Type)jcasType).casFeatCode_matchedTokens, jcasType.ll_cas.ll_getFSRef(v));}    
     
   /** indexed getter for matchedTokens - gets an indexed value - 
+   * @param i the index 
+   * @return - 
    * @generated */
   public TOP getMatchedTokens(int i) {
     if (DictTerm_Type.featOkTst && ((DictTerm_Type)jcasType).casFeat_matchedTokens == null)
@@ -140,7 +161,9 @@
     jcasType.jcas.checkArrayBounds(jcasType.ll_cas.ll_getRefValue(addr, ((DictTerm_Type)jcasType).casFeatCode_matchedTokens), i);
     return (TOP)(jcasType.ll_cas.ll_getFSForRef(jcasType.ll_cas.ll_getRefArrayValue(jcasType.ll_cas.ll_getRefValue(addr, ((DictTerm_Type)jcasType).casFeatCode_matchedTokens), i)));}
 
-  /** indexed setter for matchedTokens - sets an indexed value - 
+  /** indexed setter for matchedTokens - sets an indexed value -
+   * @param i the index
+   * @param v the value to set 
    * @generated */
   public void setMatchedTokens(int i, TOP v) { 
     if (DictTerm_Type.featOkTst && ((DictTerm_Type)jcasType).casFeat_matchedTokens == null)

diff --git a/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/DictTerm_Type.java b/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/DictTerm_Type.java
index 6a2a1b5..6a03506 100644
--- a/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/DictTerm_Type.java
+++ b/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/DictTerm_Type.java

@@ -46,13 +46,19 @@
   final Feature casFeat_DictCanon;
   /** @generated */
   final int     casFeatCode_DictCanon;
-  /** @generated */ 
+  /** @generated
+   * @param addr -
+   * @return -
+   */
   public String getDictCanon(int addr) {
         if (featOkTst && casFeat_DictCanon == null)
       jcas.throwFeatMissing("DictCanon", "org.apache.uima.conceptMapper.DictTerm");
     return ll_cas.ll_getStringValue(addr, casFeatCode_DictCanon);
   }
-  /** @generated */    
+  /** @generated 
+   * @param addr -
+   * @param v -
+   */
   public void setDictCanon(int addr, String v) {
         if (featOkTst && casFeat_DictCanon == null)
       jcas.throwFeatMissing("DictCanon", "org.apache.uima.conceptMapper.DictTerm");
@@ -64,13 +70,19 @@
   final Feature casFeat_enclosingSpan;
   /** @generated */
   final int     casFeatCode_enclosingSpan;
-  /** @generated */ 
+  /** @generated
+   * @param addr - 
+   * @return - 
+   */  
   public int getEnclosingSpan(int addr) {
         if (featOkTst && casFeat_enclosingSpan == null)
       jcas.throwFeatMissing("enclosingSpan", "org.apache.uima.conceptMapper.DictTerm");
     return ll_cas.ll_getRefValue(addr, casFeatCode_enclosingSpan);
   }
-  /** @generated */    
+  /** @generated 
+   * @param addr -
+   * @param v -
+   */    
   public void setEnclosingSpan(int addr, int v) {
         if (featOkTst && casFeat_enclosingSpan == null)
       jcas.throwFeatMissing("enclosingSpan", "org.apache.uima.conceptMapper.DictTerm");
@@ -82,13 +94,19 @@
   final Feature casFeat_matchedText;
   /** @generated */
   final int     casFeatCode_matchedText;
-  /** @generated */ 
+  /** @generated
+   * @param addr -
+   * @return - 
+   */ 
   public String getMatchedText(int addr) {
         if (featOkTst && casFeat_matchedText == null)
       jcas.throwFeatMissing("matchedText", "org.apache.uima.conceptMapper.DictTerm");
     return ll_cas.ll_getStringValue(addr, casFeatCode_matchedText);
   }
-  /** @generated */    
+  /** @generated 
+   * @param addr -
+   * @param v -
+   */    
   public void setMatchedText(int addr, String v) {
         if (featOkTst && casFeat_matchedText == null)
       jcas.throwFeatMissing("matchedText", "org.apache.uima.conceptMapper.DictTerm");
@@ -100,19 +118,29 @@
   final Feature casFeat_matchedTokens;
   /** @generated */
   final int     casFeatCode_matchedTokens;
-  /** @generated */ 
+  /** @generated 
+   * @param addr -
+   * @return -
+   */
   public int getMatchedTokens(int addr) {
         if (featOkTst && casFeat_matchedTokens == null)
       jcas.throwFeatMissing("matchedTokens", "org.apache.uima.conceptMapper.DictTerm");
     return ll_cas.ll_getRefValue(addr, casFeatCode_matchedTokens);
   }
-  /** @generated */    
+  /** @generated
+   * @param addr -
+   * @param v -
+   */
   public void setMatchedTokens(int addr, int v) {
         if (featOkTst && casFeat_matchedTokens == null)
       jcas.throwFeatMissing("matchedTokens", "org.apache.uima.conceptMapper.DictTerm");
     ll_cas.ll_setRefValue(addr, casFeatCode_matchedTokens, v);}
     
-   /** @generated */
+   /** @generated 
+    * @param addr -
+    * @param i  the index
+    * @return -
+    */
   public int getMatchedTokens(int addr, int i) {
         if (featOkTst && casFeat_matchedTokens == null)
       jcas.throwFeatMissing("matchedTokens", "org.apache.uima.conceptMapper.DictTerm");
@@ -122,7 +150,12 @@
   return ll_cas.ll_getRefArrayValue(ll_cas.ll_getRefValue(addr, casFeatCode_matchedTokens), i);
   }
    
-  /** @generated */ 
+  /** @generated 
+   *  
+   * @param addr -
+   * @param i -
+   * @param v -
+   */
   public void setMatchedTokens(int addr, int i, int v) {
         if (featOkTst && casFeat_matchedTokens == null)
       jcas.throwFeatMissing("matchedTokens", "org.apache.uima.conceptMapper.DictTerm");
@@ -136,7 +169,10 @@
 
 
   /** initialize variables to correspond with Cas Type and Features
-	* @generated */
+	* @generated
+   * @param jcas -
+   * @param casType -
+   */
   public DictTerm_Type(JCas jcas, Type casType) {
     super(jcas, casType);
     casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl)this.casType, getFSGenerator());

diff --git a/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/Logger.java b/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/Logger.java
index bfc4019..d77c967 100644
--- a/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/Logger.java
+++ b/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/Logger.java

@@ -47,6 +47,10 @@
     }
   }
 
+  public void logConfig(String message) {
+    log(Level.CONFIG, message);
+  }
+  
   public void logError(String message) {
     log(Level.SEVERE, message);
   }
@@ -66,4 +70,8 @@
   public void logFinest(String message) {
     log(Level.FINEST, message);
   }
+  
+  public boolean isLoggable(Level level) {
+    return logger.isLoggable(level);
+  }
 }

diff --git a/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/dictionaryCompiler/CompileDictionary.java b/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/dictionaryCompiler/CompileDictionary.java
index 299beef..7567ca7 100644
--- a/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/dictionaryCompiler/CompileDictionary.java
+++ b/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/dictionaryCompiler/CompileDictionary.java

@@ -18,7 +18,9 @@
  */
 package org.apache.uima.conceptMapper.dictionaryCompiler;
 
+import java.io.BufferedOutputStream;
 import java.io.FileOutputStream;
+import java.io.OutputStream;
 
 import org.apache.uima.UIMAFramework;
 import org.apache.uima.analysis_engine.AnalysisEngine;
@@ -45,7 +47,7 @@
     DictionaryResource_impl dict = (DictionaryResource_impl) ae.getResourceManager().getResource(
     		dictionaryResourceName);
 
-    FileOutputStream output = new FileOutputStream(args[1]);
+    OutputStream output = new BufferedOutputStream(new FileOutputStream(args[1]));
     dict.serializeEntries(output);
     output.close();
     ae.destroy();

diff --git a/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/CompiledDictionaryResource_impl.java b/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/CompiledDictionaryResource_impl.java
index f7af29c..1b36b51 100644
--- a/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/CompiledDictionaryResource_impl.java
+++ b/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/CompiledDictionaryResource_impl.java

@@ -18,7 +18,9 @@
  */
 package org.apache.uima.conceptMapper.support.dictionaryResource;
 
+import java.io.BufferedInputStream;
 import java.io.IOException;
+import java.io.InputStream;
 import java.io.ObjectInputStream;
 import java.util.Enumeration;
 import java.util.Hashtable;
@@ -33,6 +35,7 @@
  */
 
 public class CompiledDictionaryResource_impl implements DictionaryResource, SharedResourceObject {
+  
   /**
    * Hashtable of first words. Contains a DictEntries object keyed on word string for the first word
    * of every entry in the specified dictionary.
@@ -43,6 +46,8 @@
   public DictionaryResource newDictionaryResource(int initialSize) {
     throw new UnsupportedOperationException();
   }
+  
+  
 
   public DictEntriesByLength getEntries(String key) {
     return dictImpl.get(key);
@@ -59,10 +64,12 @@
   @SuppressWarnings("unchecked")
   public void load(DataResource data) throws ResourceInitializationException {
     try {
-      ObjectInputStream ois = new ObjectInputStream(data.getInputStream());
+      InputStream iStream = data.getInputStream();
+      BufferedInputStream bis = new BufferedInputStream(iStream);
+      ObjectInputStream ois = new ObjectInputStream(bis);
       entryPropertiesRoot = (EntryPropertiesRoot) ois.readObject();
       dictImpl = (Hashtable) ois.readObject();
-      ois.close();
+      ois.close();          
     } catch (IOException e) {
       throw new ResourceInitializationException(e);
     } catch (ClassNotFoundException e) {

diff --git a/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/DictionaryResource.java b/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/DictionaryResource.java
index 5b38346..35c0b72 100644
--- a/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/DictionaryResource.java
+++ b/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/DictionaryResource.java

@@ -53,9 +53,9 @@
   public interface DictEntries extends Serializable {
 
     /**
-     * @param elements
-     * @param unsorted
-     * @param props
+     * @param elements -
+     * @param unsorted -
+     * @param props -
      */
     void putEntry(String[] elements, String unsorted, EntryProperties props);
 
@@ -87,7 +87,7 @@
   /**
    * return data structure containing a list of dictionary entries, sorted by number of tokens
    * 
-   * @param key
+   * @param key -
    * @return data structure containing a list of dictionary entries, sorted by number of tokens
    */
   public DictEntriesByLength getEntries(String key);
@@ -113,13 +113,13 @@
   public String toString();
 
   /**
-   * @param context
-   * @param logger
-   * @param tokenAnnotationName
-   * @param tokenTypeFeatureName
-   * @param tokenClassFeatureName
-   * @param tokenizerDescriptor
-   * @throws ResourceInitializationException
+   * @param context -
+   * @param logger -
+   * @param tokenAnnotationName -
+   * @param tokenTypeFeatureName -
+   * @param tokenClassFeatureName -
+   * @param tokenizerDescriptor -
+   * @throws ResourceInitializationException -
    */
   public void loadDictionaryContents(UimaContext context, Logger logger,
           String tokenAnnotationName, String tokenTypeFeatureName, String tokenClassFeatureName,

diff --git a/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/DictionaryResource_impl.java b/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/DictionaryResource_impl.java
index e5d93f6..f0780b5 100644
--- a/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/DictionaryResource_impl.java
+++ b/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/DictionaryResource_impl.java

@@ -18,11 +18,16 @@
  */
 package org.apache.uima.conceptMapper.support.dictionaryResource;
 
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.ObjectOutputStream;
+import java.io.OutputStream;
 import java.io.Serializable;
+import java.net.URL;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Comparator;
@@ -60,6 +65,24 @@
  */
 
 public class DictionaryResource_impl implements DictionaryResource, SharedResourceObject {
+  
+  /** 
+   * support making a compiled dictionary as a side effect of loading
+   *   - trigger: the existence of a file path in the property 
+   *       uima.conceptmapper.compiled_dictionary_directory
+   *   - this is interpreted as a path to a writable directory where
+   *     the compiled version of this is written after loading
+   *     
+   */
+  
+  public final static String SAVE_COMPILED = "uima.conceptmapper.compiled_dictionary_directory";
+  
+  private final static File compDictDir;
+  static {
+    String p = System.getProperty(SAVE_COMPILED);
+    compDictDir = (p == null) ? null : new File(p);
+  }
+  
 /** Dictionary file loader. Uses an XML parser. */
   protected DictLoader dictLoader;
 
@@ -170,7 +193,7 @@
    * @param unsorted
    * 		  an unsorted string representation of the entry, if the contents of 'elements' has been sorted
    * @param length
-   *          the number of words in the phrase (>=1)
+   *          the number of words in the phrase (&gt;=1)
    * @param props
    *          the EntryProperties object for the dictionary entry
    */
@@ -271,11 +294,36 @@
       // System.out.print ("Loading Dictionary: '" + dictLoader.dataResource.getUri().toString() +
       // "'...");
       // System.out.print ("Loading Dictionary...");
-      logger.logInfo("Loading Dictionary...");
+      URL dictUrl = dictLoader.dataResource.getUrl();
+      String loadPath = (null != dictUrl) 
+                          ? (" from " + dictLoader.dataResource.getUrl().toString())
+                          : "";
+      logger.logInfo("Loading Dictionary" + loadPath);
       dictLoader.setDictionary(dictStream, NumOfInitialDictEntries, tokenAnnotationName,
               tokenTypeFeatureName, tokenClassFeatureName, tokenizerDescriptor, tokenFilter,
               tokenNormalizer, langID, entryPropertiesRoot);
-      logger.logInfo("...done");
+      logger.logInfo("...done loading dictionary" + loadPath);
+      
+      do { // to establish break boundary
+        if (compDictDir != null) {
+          if (!compDictDir.exists()) {
+            if (!compDictDir.mkdirs()) {
+              logger.logError("Cannot create compiled dictionary output directory: " + compDictDir.toString());
+              break;
+            }
+          }
+          
+          // get the last part of the file source name - this will be the name of the compiled dictioary
+          String n = dictUrl.getPath();
+          int i = n.lastIndexOf(File.separator);
+          n = (i >= 0) ? n.substring(i + 1) : n;
+          
+          BufferedOutputStream output = new BufferedOutputStream(new FileOutputStream(new File(compDictDir, n)));
+          serializeEntries(output);
+          output.close();
+        }
+      } while (false);  // of do establishing break boundary
+      
       // System.out.println ("done");
       // System.err.println("NEW DICT:\n" + toString());
       setLoaded(true);
@@ -458,9 +506,9 @@
     EntryProperties properties;
 
     /**
-     * @param elements
-     * @param unsorted
-     * @param properties
+     * @param elements -
+     * @param unsorted -
+     * @param properties -
      */
     public DictEntryImpl(String[] elements, String unsorted,
             EntryProperties properties) {
@@ -954,7 +1002,7 @@
     }
 
     public InputStream getInputStream() throws IOException {
-      return dataResource.getInputStream();
+      return new BufferedInputStream(dataResource.getInputStream());
     }
 
   }
@@ -1006,7 +1054,7 @@
     return result.toString();
   }
 
-  public void serializeEntries(FileOutputStream output) throws IOException {
+  public void serializeEntries(OutputStream output) throws IOException {
     ObjectOutputStream oos = new ObjectOutputStream(output);
     oos.writeObject(this.entryPropertiesRoot);
     oos.writeObject(this.dictImpl);

diff --git a/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/EntryProperties.java b/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/EntryProperties.java
index f01201d..8bdd6fd 100644
--- a/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/EntryProperties.java
+++ b/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/EntryProperties.java

@@ -31,9 +31,9 @@
 
 	/**
 	 * 
-	 * @param root
-	 * @param maxNumberOfProperties
-	 * @throws NullPointerException
+	 * @param root -
+	 * @param maxNumberOfProperties -
+	 * @throws NullPointerException -
 	 * 
 	 * should only be called by factory
 	 */

diff --git a/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/annotatorAdaptor/AnnotatorAdaptor.java b/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/annotatorAdaptor/AnnotatorAdaptor.java
index b3d0976..9fe3056 100644
--- a/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/annotatorAdaptor/AnnotatorAdaptor.java
+++ b/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/annotatorAdaptor/AnnotatorAdaptor.java

@@ -18,7 +18,9 @@
  */
 package org.apache.uima.conceptMapper.support.dictionaryResource.annotatorAdaptor;
 
+import java.io.File;
 import java.io.IOException;
+import java.io.InputStream;
 import java.net.MalformedURLException;
 import java.util.Vector;
 
@@ -40,6 +42,7 @@
 import org.apache.uima.resource.ResourceManager;
 import org.apache.uima.resource.ResourceSpecifier;
 import org.apache.uima.util.InvalidXMLException;
+import org.apache.uima.util.Level;
 import org.apache.uima.util.XMLInputSource;
 
 public class AnnotatorAdaptor {
@@ -71,15 +74,28 @@
           throws DictionaryLoaderException {
     super();
     try {
-      aeSpecifier = UIMAFramework.getXMLParser().parseResourceSpecifier(
-              new XMLInputSource(analysisEngineDescriptorPath));
+      this.logger = logger;
+//      aeSpecifier = UIMAFramework.getXMLParser().parseResourceSpecifier(
+//              new XMLInputSource(analysisEngineDescriptorPath));
+      
+      XMLInputSource descriptorSource = null;
+      if (new File(analysisEngineDescriptorPath).exists()) {
+        logger.logConfig("Loading the analysisEngineDescriptorPath from file system path: "+ analysisEngineDescriptorPath);
+        descriptorSource = new XMLInputSource(analysisEngineDescriptorPath);
+      } else {
+        logger.logConfig("Loading the analysisEngineDescriptorPath from class path: "+ analysisEngineDescriptorPath);
+        InputStream is = this.getClass().getResourceAsStream(analysisEngineDescriptorPath);
+        descriptorSource = new XMLInputSource(is, null);
+      }
+      aeSpecifier = UIMAFramework.getXMLParser().parseResourceSpecifier(descriptorSource);
+      
       this.tokenTypeName = tokenTypeName;
       this.tokenTypeFeature = tokenFilter.getTokenTypeFeature();
       this.tokenClassFeature = tokenFilter.getTokenClassFeature();
       this.tokenFilter = tokenFilter;
       this.langID = langID;
       this.result = result;
-      this.logger = logger;
+
     } catch (InvalidXMLException e) {
       throw new DictionaryLoaderException(e);
     } catch (IOException e) {

diff --git a/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/stemmer/Stemmer.java b/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/stemmer/Stemmer.java
index ea0554c..6b3a20f 100644
--- a/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/stemmer/Stemmer.java
+++ b/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/stemmer/Stemmer.java

@@ -44,6 +44,9 @@
 
   /**
    * Initialize the stemmer with a dictionary
+   * @param dictionary -
+   * @throws FileNotFoundException -
+   * @throws ParseException -
    */
   public void initialize(String dictionary) throws FileNotFoundException, ParseException;
 

diff --git a/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokenizer/OffsetTokenizer.java b/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokenizer/OffsetTokenizer.java
index 3d5a91d..db9de26 100644
--- a/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokenizer/OffsetTokenizer.java
+++ b/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokenizer/OffsetTokenizer.java

@@ -137,6 +137,7 @@
    * {@link #nextToken(JCas) nextToken} will return the first token from the input string
    * as a TokenAnnotation; you can get the text by using
    * {@link TokenAnnotation#getText()}
+   * @param text -
    */
   public void setText(String text) {
     this.text = text;
@@ -374,8 +375,6 @@
    * 
    * @param jcas
    *          the current CAS to process.
-   * @param aResultSpec
-   *          a specification of the result annotation that should be created by this annotator
    * 
    * @see org.apache.uima.analysis_engine.annotator.JTextAnnotator#process(JCas, ResultSpecification)
    */
@@ -400,9 +399,9 @@
   }
 
   /**
-   * @param jcas
-   * @param documentText
-   * @param delimiters
+   * @param jcas -
+   * @param documentText -
+   * @param delimiters -
    */
   protected void doTokenization(JCas jcas, String documentText, String delimiters) {
 
@@ -421,8 +420,8 @@
   }
 
   /**
-   * @param configParameterName
-   * @param configParameterValue
+   * @param configParameterName -
+   * @param configParameterValue -
    */
   public void processConfigurationParameter(String configParameterName, Object configParameterValue) {
     if (configParameterName.equals(PARAM_CASE_MATCH)) {

diff --git a/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokenizer/TokenAnnotation.java b/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokenizer/TokenAnnotation.java
index 22fc03b..c7b78f7 100644
--- a/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokenizer/TokenAnnotation.java
+++ b/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokenizer/TokenAnnotation.java

@@ -30,19 +30,28 @@
   protected TokenAnnotation() {}
     
   /** Internal - constructor used by generator 
-   * @generated */
+   * @generated 
+   * @param addr -
+   * @param type -
+   */
   public TokenAnnotation(int addr, TOP_Type type) {
     super(addr, type);
     readObject();
   }
   
-  /** @generated */
+  /** @generated 
+   * @param jcas -
+   */
   public TokenAnnotation(JCas jcas) {
     super(jcas);
     readObject();   
   } 
 
-  /** @generated */  
+  /** @generated 
+   * @param jcas -
+   * @param begin -
+   * @param end -
+   */
   public TokenAnnotation(JCas jcas, int begin, int end) {
     super(jcas);
     setBegin(begin);
@@ -62,14 +71,18 @@
   //* Feature: text
 
   /** getter for text - gets text of token
-   * @generated */
+   * @generated 
+   * @return -
+   */
   public String getText() {
     if (TokenAnnotation_Type.featOkTst && ((TokenAnnotation_Type)jcasType).casFeat_text == null)
       jcasType.jcas.throwFeatMissing("text", "org.apache.uima.conceptMapper.support.tokenizer.TokenAnnotation");
     return jcasType.ll_cas.ll_getStringValue(addr, ((TokenAnnotation_Type)jcasType).casFeatCode_text);}
     
   /** setter for text - sets text of token 
-   * @generated */
+   * @generated 
+   * @param v -
+   */
   public void setText(String v) {
     if (TokenAnnotation_Type.featOkTst && ((TokenAnnotation_Type)jcasType).casFeat_text == null)
       jcasType.jcas.throwFeatMissing("text", "org.apache.uima.conceptMapper.support.tokenizer.TokenAnnotation");
@@ -80,14 +93,18 @@
   //* Feature: tokenType
 
   /** getter for tokenType - gets 
-   * @generated */
+   * @generated 
+   * @return -
+   */
   public int getTokenType() {
     if (TokenAnnotation_Type.featOkTst && ((TokenAnnotation_Type)jcasType).casFeat_tokenType == null)
       jcasType.jcas.throwFeatMissing("tokenType", "org.apache.uima.conceptMapper.support.tokenizer.TokenAnnotation");
     return jcasType.ll_cas.ll_getIntValue(addr, ((TokenAnnotation_Type)jcasType).casFeatCode_tokenType);}
     
   /** setter for tokenType - sets  
-   * @generated */
+   * @generated 
+   * @param v -
+   */
   public void setTokenType(int v) {
     if (TokenAnnotation_Type.featOkTst && ((TokenAnnotation_Type)jcasType).casFeat_tokenType == null)
       jcasType.jcas.throwFeatMissing("tokenType", "org.apache.uima.conceptMapper.support.tokenizer.TokenAnnotation");
@@ -98,14 +115,18 @@
   //* Feature: tokenClass
 
   /** getter for tokenClass - gets semantic class, or other such classification of this token
-   * @generated */
+   * @generated 
+   * @return -
+   */
   public String getTokenClass() {
     if (TokenAnnotation_Type.featOkTst && ((TokenAnnotation_Type)jcasType).casFeat_tokenClass == null)
       jcasType.jcas.throwFeatMissing("tokenClass", "org.apache.uima.conceptMapper.support.tokenizer.TokenAnnotation");
     return jcasType.ll_cas.ll_getStringValue(addr, ((TokenAnnotation_Type)jcasType).casFeatCode_tokenClass);}
     
   /** setter for tokenClass - sets semantic class, or other such classification of this token 
-   * @generated */
+   * @generated 
+   * @param v -
+   */
   public void setTokenClass(String v) {
     if (TokenAnnotation_Type.featOkTst && ((TokenAnnotation_Type)jcasType).casFeat_tokenClass == null)
       jcasType.jcas.throwFeatMissing("tokenClass", "org.apache.uima.conceptMapper.support.tokenizer.TokenAnnotation");

diff --git a/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokenizer/TokenAnnotation_Type.java b/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokenizer/TokenAnnotation_Type.java
index d78fd23..2f9363b 100644
--- a/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokenizer/TokenAnnotation_Type.java
+++ b/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokenizer/TokenAnnotation_Type.java

@@ -44,13 +44,19 @@
   final Feature casFeat_text;
   /** @generated */
   final int     casFeatCode_text;
-  /** @generated */ 
+  /** @generated 
+   * @param addr -
+   * @return -
+   */
   public String getText(int addr) {
         if (featOkTst && casFeat_text == null)
       jcas.throwFeatMissing("text", "org.apache.uima.conceptMapper.support.tokenizer.TokenAnnotation");
     return ll_cas.ll_getStringValue(addr, casFeatCode_text);
   }
-  /** @generated */    
+  /** @generated 
+   * @param addr -
+   * @param v -
+   */
   public void setText(int addr, String v) {
         if (featOkTst && casFeat_text == null)
       jcas.throwFeatMissing("text", "org.apache.uima.conceptMapper.support.tokenizer.TokenAnnotation");
@@ -62,13 +68,19 @@
   final Feature casFeat_tokenType;
   /** @generated */
   final int     casFeatCode_tokenType;
-  /** @generated */ 
+  /** @generated 
+   * @param addr -
+   * @return -
+   */
   public int getTokenType(int addr) {
         if (featOkTst && casFeat_tokenType == null)
       jcas.throwFeatMissing("tokenType", "org.apache.uima.conceptMapper.support.tokenizer.TokenAnnotation");
     return ll_cas.ll_getIntValue(addr, casFeatCode_tokenType);
   }
-  /** @generated */    
+  /** @generated 
+   * @param addr -
+   * @param v -
+   */
   public void setTokenType(int addr, int v) {
         if (featOkTst && casFeat_tokenType == null)
       jcas.throwFeatMissing("tokenType", "org.apache.uima.conceptMapper.support.tokenizer.TokenAnnotation");
@@ -80,13 +92,19 @@
   final Feature casFeat_tokenClass;
   /** @generated */
   final int     casFeatCode_tokenClass;
-  /** @generated */ 
+  /** @generated 
+   * @param addr -
+   * @return -
+   */
   public String getTokenClass(int addr) {
         if (featOkTst && casFeat_tokenClass == null)
       jcas.throwFeatMissing("tokenClass", "org.apache.uima.conceptMapper.support.tokenizer.TokenAnnotation");
     return ll_cas.ll_getStringValue(addr, casFeatCode_tokenClass);
   }
-  /** @generated */    
+  /** @generated 
+   * @param addr -
+   * @param v -
+   */
   public void setTokenClass(int addr, String v) {
         if (featOkTst && casFeat_tokenClass == null)
       jcas.throwFeatMissing("tokenClass", "org.apache.uima.conceptMapper.support.tokenizer.TokenAnnotation");
@@ -97,7 +115,10 @@
 
 
   /** initialize variables to correspond with Cas Type and Features
-	* @generated */
+	* @generated 
+   * @param jcas -
+   * @param casType -
+   */
   public TokenAnnotation_Type(JCas jcas, Type casType) {
     super(jcas, casType);
     casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl)this.casType, getFSGenerator());

diff --git a/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokens/TokenFilter.java b/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokens/TokenFilter.java
index 345e97e..232c052 100644
--- a/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokens/TokenFilter.java
+++ b/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokens/TokenFilter.java

@@ -270,7 +270,7 @@
   }
 
   /**
-   * @param token
+   * @param token -
    * @return false if tokenTypeFeature is set, and the token's tokenTypeFeature slot is set, but the value is not OK
    */
   public boolean checkTokenType(AnnotationFS token) {
@@ -318,12 +318,12 @@
   }
 
   /**
-   * @param typeSystem
+   * @param typeSystem =
    * @param requireFeatureExistence -
    *          if true, if the tokenType and/or tokenClass features of the tokenAnnotation are
    *          specified, they must exist. This is to allow for the situation where these features
    *          might not exist during dictionary loading, but are needed at annotator runtime
-   * @throws UnknownTypeException
+   * @throws UnknownTypeException -
    */
   public void initTypes(TypeSystem typeSystem, boolean requireFeatureExistence)
           throws UnknownTypeException {

diff --git a/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokens/TokenNormalizer.java b/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokens/TokenNormalizer.java
index 7a0a5eb..eaed1d3 100644
--- a/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokens/TokenNormalizer.java
+++ b/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokens/TokenNormalizer.java

@@ -66,9 +66,9 @@
   private boolean replaceCommaWithAND;
 
   /**
-   * @param annotatorContext
-   * @param logger
-   * @throws AnnotatorContextException
+   * @param uimaContext -
+   * @param logger -
+   * @throws AnnotatorContextException -
    */
   public TokenNormalizer(UimaContext uimaContext, Logger logger)
           throws AnnotatorContextException {

diff --git a/ConceptMapper/src/main/java/uima/tt/TokenAnnotation.java b/ConceptMapper/src/main/java/uima/tt/TokenAnnotation.java
index ed9b788..2901af7 100644
--- a/ConceptMapper/src/main/java/uima/tt/TokenAnnotation.java
+++ b/ConceptMapper/src/main/java/uima/tt/TokenAnnotation.java

@@ -31,19 +31,28 @@
   protected TokenAnnotation() {}
     
   /** Internal - constructor used by generator 
-   * @generated */
+   * @generated 
+   * @param addr -
+   * @param type -
+   */
   public TokenAnnotation(int addr, TOP_Type type) {
     super(addr, type);
     readObject();
   }
   
-  /** @generated */
+  /** @generated 
+   * @param jcas -
+   */
   public TokenAnnotation(JCas jcas) {
     super(jcas);
     readObject();   
   } 
 
-  /** @generated */  
+  /** @generated 
+   * @param jcas -
+   * @param begin -
+   * @param end -
+   */
   public TokenAnnotation(JCas jcas, int begin, int end) {
     super(jcas);
     setBegin(begin);

diff --git a/ConceptMapper/src/main/java/uima/tt/TokenAnnotation_Type.java b/ConceptMapper/src/main/java/uima/tt/TokenAnnotation_Type.java
index eb434cb..4efe414 100644
--- a/ConceptMapper/src/main/java/uima/tt/TokenAnnotation_Type.java
+++ b/ConceptMapper/src/main/java/uima/tt/TokenAnnotation_Type.java

@@ -42,7 +42,10 @@
 
 
   /** initialize variables to correspond with Cas Type and Features
-	* @generated */
+	* @generated
+   * @param jcas -
+   * @param casType -
+   */
   public TokenAnnotation_Type(JCas jcas, Type casType) {
     super(jcas, casType);
     casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl)this.casType, getFSGenerator());

diff --git a/ConceptMapper/src/main/resources/analysis_engine/aggregate/OffsetTokenizerMatcher.xml b/ConceptMapper/src/main/resources/analysis_engine/aggregate/OffsetTokenizerMatcher.xml
new file mode 100644
index 0000000..c0167f8
--- /dev/null
+++ b/ConceptMapper/src/main/resources/analysis_engine/aggregate/OffsetTokenizerMatcher.xml

@@ -0,0 +1,59 @@
+<?xml version="1.0" encoding="UTF-8"?>

+<!--

+  Licensed to the Apache Software Foundation (ASF) under one

+  or more contributor license agreements.  See the NOTICE file

+  distributed with this work for additional information

+  regarding copyright ownership.  The ASF licenses this file

+  to you under the Apache License, Version 2.0 (the

+  "License"); you may not use this file except in compliance

+  with the License.  You may obtain a copy of the License at

+  

+  http://www.apache.org/licenses/LICENSE-2.0

+  

+  Unless required by applicable law or agreed to in writing,

+  software distributed under the License is distributed on an

+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY

+  KIND, either express or implied.  See the License for the

+  specific language governing permissions and limitations

+  under the License.    

+-->  

+<taeDescription xmlns="http://uima.apache.org/resourceSpecifier">

+	<frameworkImplementation>org.apache.uima.java</frameworkImplementation>

+	<primitive>false</primitive>

+	<delegateAnalysisEngineSpecifiers>

+		<delegateAnalysisEngine key="ConceptMapper">

+			<import location="../primitive/ConceptMapperOffsetTokenizer.xml" />

+		</delegateAnalysisEngine>

+		<delegateAnalysisEngine key="Tokenizer">

+			<import location="../primitive/OffsetTokenizer.xml" />

+		</delegateAnalysisEngine>

+	</delegateAnalysisEngineSpecifiers>

+	<analysisEngineMetaData>

+		<name>DictMatcher</name>

+		<configurationParameters />

+		<configurationParameterSettings />

+		<flowConstraints>

+			<fixedFlow>

+				<node>Tokenizer</node>

+				<node>ConceptMapper</node>

+			</fixedFlow>

+		</flowConstraints>

+		<fsIndexCollection />

+		<capabilities>

+			<capability>

+				<inputs />

+				<outputs>

+					<type allAnnotatorFeatures="true">uima.tt.TokenAnnotation</type>

+					<type allAnnotatorFeatures="true">uima.tt.SentenceAnnotation</type>

+					<type allAnnotatorFeatures="true">uima.tt.ParagraphAnnotation</type>

+					<type allAnnotatorFeatures="true">org.apache.uima.conceptMapper.DictTerm</type>

+				</outputs>

+				<languagesSupported />

+			</capability>

+		</capabilities>

+		<operationalProperties>

+			<modifiesCas>true</modifiesCas>

+			<multipleDeploymentAllowed>true</multipleDeploymentAllowed>

+		</operationalProperties>

+	</analysisEngineMetaData>

+</taeDescription>


diff --git a/ConceptMapper/src/main/resources/analysis_engine/primitive/ConceptMapperOffsetTokenizer.xml b/ConceptMapper/src/main/resources/analysis_engine/primitive/ConceptMapperOffsetTokenizer.xml
new file mode 100644
index 0000000..c66df5c
--- /dev/null
+++ b/ConceptMapper/src/main/resources/analysis_engine/primitive/ConceptMapperOffsetTokenizer.xml

@@ -0,0 +1,513 @@
+<?xml version="1.0" encoding="UTF-8"?>

+<!--

+  Licensed to the Apache Software Foundation (ASF) under one

+  or more contributor license agreements.  See the NOTICE file

+  distributed with this work for additional information

+  regarding copyright ownership.  The ASF licenses this file

+  to you under the Apache License, Version 2.0 (the

+  "License"); you may not use this file except in compliance

+  with the License.  You may obtain a copy of the License at

+  

+  http://www.apache.org/licenses/LICENSE-2.0

+  

+  Unless required by applicable law or agreed to in writing,

+  software distributed under the License is distributed on an

+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY

+  KIND, either express or implied.  See the License for the

+  specific language governing permissions and limitations

+  under the License.    

+-->  

+<taeDescription xmlns="http://uima.apache.org/resourceSpecifier">

+	<frameworkImplementation>org.apache.uima.java</frameworkImplementation>

+	<primitive>true</primitive>

+	<annotatorImplementationName>org.apache.uima.conceptMapper.ConceptMapper</annotatorImplementationName>

+	<analysisEngineMetaData>

+		<name>ConceptMapper</name>

+		<description></description>

+		<version>1</version>

+		<vendor></vendor>

+		<configurationParameters>

+			<configurationParameter>

+				<name>caseMatch</name>

+				<description>

+					this parameter specifies the case folding mode:

+					ignoreall - fold everything to lowercase for

+					matching insensitive - fold only tokens with initial

+					caps to lowercase digitfold - fold all (and only)

+					tokens with a digit sensitive - perform no case

+					folding

+				</description>

+				<type>String</type>

+				<multiValued>false</multiValued>

+				<mandatory>true</mandatory>

+			</configurationParameter>

+			<configurationParameter>

+				<name>Stemmer</name>

+				<description>

+					Name of stemmer class to use before matching. MUST

+					have a zero-parameter constructor! If not specified,

+					no stemming will be performed.

+				</description>

+				<type>String</type>

+				<multiValued>false</multiValued>

+				<mandatory>false</mandatory>

+			</configurationParameter>

+			<configurationParameter>

+				<name>ResultingAnnotationName</name>

+				<description>

+					Name of the annotation type created by this TAE,

+					must match the typeSystemDescription entry

+				</description>

+				<type>String</type>

+				<multiValued>false</multiValued>

+				<mandatory>true</mandatory>

+			</configurationParameter>

+			<configurationParameter>

+				<name>ResultingEnclosingSpanName</name>

+				<description>

+					Name of the feature in the resultingAnnotation to

+					contain the span that encloses it (i.e. its

+					sentence)

+				</description>

+				<type>String</type>

+				<multiValued>false</multiValued>

+				<mandatory>false</mandatory>

+			</configurationParameter>

+			<configurationParameter>

+				<name>AttributeList</name>

+				<description>

+					List of attribute names for XML dictionary entry

+					record - must correspond to FeatureList

+				</description>

+				<type>String</type>

+				<multiValued>true</multiValued>

+				<mandatory>true</mandatory>

+			</configurationParameter>

+			<configurationParameter>

+				<name>FeatureList</name>

+				<description>

+					List of feature names for CAS annotation - must

+					correspond to AttributeList

+				</description>

+				<type>String</type>

+				<multiValued>true</multiValued>

+				<mandatory>true</mandatory>

+			</configurationParameter>

+			<configurationParameter>

+				<name>TokenAnnotation</name>

+				<description></description>

+				<type>String</type>

+				<multiValued>false</multiValued>

+				<mandatory>true</mandatory>

+			</configurationParameter>

+			<configurationParameter>

+				<name>TokenClassFeatureName</name>

+				<description>

+					Name of feature used when doing lookups against

+					IncludedTokenClasses and ExcludedTokenClasses

+				</description>

+				<type>String</type>

+				<multiValued>false</multiValued>

+				<mandatory>false</mandatory>

+			</configurationParameter>

+			<configurationParameter>

+				<name>TokenTextFeatureName</name>

+				<description></description>

+				<type>String</type>

+				<multiValued>false</multiValued>

+				<mandatory>false</mandatory>

+			</configurationParameter>

+			<configurationParameter>

+				<name>SpanFeatureStructure</name>

+				<description>

+					Type of annotation which corresponds to spans of

+					data for processing (e.g. a Sentence)

+				</description>

+				<type>String</type>

+				<multiValued>false</multiValued>

+				<mandatory>true</mandatory>

+			</configurationParameter>

+			<configurationParameter>

+				<name>OrderIndependentLookup</name>

+				<description>

+					True if should ignore element order during lookup

+					(i.e., "top box" would equal "box top"). Default is

+					False.

+				</description>

+				<type>Boolean</type>

+				<multiValued>false</multiValued>

+				<mandatory>false</mandatory>

+			</configurationParameter>

+			<configurationParameter>

+				<name>TokenTypeFeatureName</name>

+				<description>

+					Name of feature used when doing lookups against

+					IncludedTokenTypes and ExcludedTokenTypes

+				</description>

+				<type>String</type>

+				<multiValued>false</multiValued>

+				<mandatory>false</mandatory>

+			</configurationParameter>

+			<configurationParameter>

+				<name>IncludedTokenTypes</name>

+				<description>

+					Type of tokens to include in lookups (if not

+					supplied, then all types are included except those

+					specifically mentioned in ExcludedTokenTypes)

+				</description>

+				<type>Integer</type>

+				<multiValued>true</multiValued>

+				<mandatory>false</mandatory>

+			</configurationParameter>

+			<configurationParameter>

+				<name>ExcludedTokenTypes</name>

+				<description></description>

+				<type>Integer</type>

+				<multiValued>true</multiValued>

+				<mandatory>false</mandatory>

+			</configurationParameter>

+			<configurationParameter>

+				<name>ExcludedTokenClasses</name>

+				<description>

+					Class of tokens to exclude from lookups (if not

+					supplied, then all classes are excluded except those

+					specifically mentioned in IncludedTokenClasses,

+					unless IncludedTokenClasses is not supplied, in

+					which case none are excluded)

+				</description>

+				<type>String</type>

+				<multiValued>true</multiValued>

+				<mandatory>false</mandatory>

+			</configurationParameter>

+			<configurationParameter>

+				<name>IncludedTokenClasses</name>

+				<description>

+					Class of tokens to include in lookups (if not

+					supplied, then all classes are included except those

+					specifically mentioned in ExcludedTokenClasses)

+				</description>

+				<type>String</type>

+				<multiValued>true</multiValued>

+				<mandatory>false</mandatory>

+			</configurationParameter>

+			<configurationParameter>

+				<name>TokenClassWriteBackFeatureNames</name>

+				<description>

+					names of features that should be written back to a

+					token, such as a POS tag

+				</description>

+				<type>String</type>

+				<multiValued>true</multiValued>

+				<mandatory>false</mandatory>

+			</configurationParameter>

+			<configurationParameter>

+				<name>ResultingAnnotationMatchedTextFeature</name>

+				<type>String</type>

+				<multiValued>false</multiValued>

+				<mandatory>false</mandatory>

+			</configurationParameter>

+			<configurationParameter>

+				<name>PrintDictionary</name>

+				<type>Boolean</type>

+				<multiValued>false</multiValued>

+				<mandatory>false</mandatory>

+			</configurationParameter>

+			<configurationParameter>

+				<name>SearchStrategy</name>

+				<description>

+					Can be either "SkipAnyMatch",

+					"SkipAnyMatchAllowOverlap" or

+					"ContiguousMatch"&#13;&#13;ContiguousMatch: longest

+					match of contiguous tokens within enclosing

+					span(taking into account included/excluded items).

+					DEFAULT strategy &#13;SkipAnyMatch: longest match of

+					not-necessarily contiguous tokens within enclosing

+					span (taking into account included/excluded items).

+					Subsequent lookups begin in span after complete

+					match. IMPLIES order-independent lookup

+					&#13;SkipAnyMatchAllowOverlap: longest match of

+					not-necessarily contiguous tokens within enclosing

+					span (taking into account included/excluded items).

+					Subsequent lookups begin in span after next token.

+					IMPLIES order-independent lookup

+				</description>

+				<type>String</type>

+				<multiValued>false</multiValued>

+				<mandatory>false</mandatory>

+			</configurationParameter>

+			<configurationParameter>

+				<name>StopWords</name>

+				<type>String</type>

+				<multiValued>true</multiValued>

+				<mandatory>false</mandatory>

+			</configurationParameter>

+			<configurationParameter>

+				<name>FindAllMatches</name>

+				<type>Boolean</type>

+				<multiValued>false</multiValued>

+				<mandatory>false</mandatory>

+			</configurationParameter>

+			<configurationParameter>

+				<name>MatchedTokensFeatureName</name>

+				<type>String</type>

+				<multiValued>false</multiValued>

+				<mandatory>false</mandatory>

+			</configurationParameter>

+			<configurationParameter>

+				<name>ReplaceCommaWithAND</name>

+				<type>Boolean</type>

+				<multiValued>false</multiValued>

+				<mandatory>false</mandatory>

+			</configurationParameter>

+			<configurationParameter>

+				<name>TokenizerDescriptorPath</name>

+				<type>String</type>

+				<multiValued>false</multiValued>

+				<mandatory>true</mandatory>

+			</configurationParameter>

+			<configurationParameter>

+				<name>LanguageID</name>

+				<type>String</type>

+				<multiValued>false</multiValued>

+				<mandatory>false</mandatory>

+			</configurationParameter>

+		</configurationParameters>

+		<configurationParameterSettings>

+			<nameValuePair>

+				<name>caseMatch</name>

+				<value>

+					<string>ignoreall</string>

+				</value>

+			</nameValuePair>

+			<nameValuePair>

+				<name>AttributeList</name>

+				<value>

+					<array>

+						<string>canonical</string>

+					</array>

+				</value>

+			</nameValuePair>

+			<nameValuePair>

+				<name>FeatureList</name>

+				<value>

+					<array>

+						<string>DictCanon</string>

+					</array>

+				</value>

+			</nameValuePair>

+			<nameValuePair>

+				<name>TokenAnnotation</name>

+				<value>

+					<string>uima.tt.TokenAnnotation</string>

+				</value>

+			</nameValuePair>

+			<nameValuePair>

+				<name>ResultingAnnotationName</name>

+				<value>

+					<string>

+						org.apache.uima.conceptMapper.DictTerm

+					</string>

+				</value>

+			</nameValuePair>

+			<nameValuePair>

+				<name>SpanFeatureStructure</name>

+				<value>

+					<string>uima.tcas.DocumentAnnotation</string>

+				</value>

+			</nameValuePair>

+			<nameValuePair>

+				<name>OrderIndependentLookup</name>

+				<value>

+					<boolean>false</boolean>

+				</value>

+			</nameValuePair>

+			<nameValuePair>

+				<name>TokenClassWriteBackFeatureNames</name>

+				<value>

+					<array />

+				</value>

+			</nameValuePair>

+			<nameValuePair>

+				<name>IncludedTokenClasses</name>

+				<value>

+					<array />

+				</value>

+			</nameValuePair>

+			<nameValuePair>

+				<name>PrintDictionary</name>

+				<value>

+					<boolean>false</boolean>

+				</value>

+			</nameValuePair>

+			<nameValuePair>

+				<name>FindAllMatches</name>

+				<value>

+					<boolean>false</boolean>

+				</value>

+			</nameValuePair>

+			<nameValuePair>

+				<name>StopWords</name>

+				<value>

+					<array />

+				</value>

+			</nameValuePair>

+			<nameValuePair>

+				<name>ReplaceCommaWithAND</name>

+				<value>

+					<boolean>false</boolean>

+				</value>

+			</nameValuePair>

+			<nameValuePair>

+				<name>TokenizerDescriptorPath</name>

+				<value>

+					<string>

+						/OtherStuff/IBM/eclipse-UIMAsandbox/ConceptMapper/desc/analysis_engine/primitive/OffsetTokenizer.xml

+					</string>

+				</value>

+			</nameValuePair>

+			<nameValuePair>

+				<name>ResultingEnclosingSpanName</name>

+				<value>

+					<string>enclosingSpan</string>

+				</value>

+			</nameValuePair>

+			<nameValuePair>

+				<name>MatchedTokensFeatureName</name>

+				<value>

+					<string>matchedTokens</string>

+				</value>

+			</nameValuePair>

+			<nameValuePair>

+				<name>ResultingAnnotationMatchedTextFeature</name>

+				<value>

+					<string>matchedText</string>

+				</value>

+			</nameValuePair>

+			<nameValuePair>

+				<name>SearchStrategy</name>

+				<value>

+					<string>ContiguousMatch</string>

+				</value>

+			</nameValuePair>

+			<nameValuePair>

+				<name>LanguageID</name>

+				<value>

+					<string>en</string>

+				</value>

+			</nameValuePair>

+		</configurationParameterSettings>

+		<typeSystemDescription>

+			<imports>

+				<import name="org.apache.uima.conceptMapper.DictTerm" />

+				<import

+					name="org.apache.uima.conceptMapper.support.tokenizer.TokenAnnotation" />

+			</imports>

+			<types>

+				<typeDescription>

+					<name>uima.tt.TokenAnnotation</name>

+					<description></description>

+					<supertypeName>uima.tcas.Annotation</supertypeName>

+					<features>

+						<featureDescription>

+							<name>SemClass</name>

+							<description>

+								semantic class of token

+							</description>

+							<rangeTypeName>

+								uima.cas.String

+							</rangeTypeName>

+						</featureDescription>

+						<featureDescription>

+							<name>POS</name>

+							<description>

+								Part of SPeech of term to which this

+								token is a part

+							</description>

+							<rangeTypeName>

+								uima.cas.String

+							</rangeTypeName>

+						</featureDescription>

+						<featureDescription>

+							<name>frost_TokenType</name>

+							<description></description>

+							<rangeTypeName>

+								uima.cas.Integer

+							</rangeTypeName>

+						</featureDescription>

+					</features>

+				</typeDescription>

+			</types>

+		</typeSystemDescription>

+		<typePriorities>

+			<priorityList>

+				<!-- <type>uima.tt.SentenceAnnotation</type> -->

+				<type>uima.tt.TokenAnnotation</type>

+			</priorityList>

+		</typePriorities>

+		<fsIndexCollection />

+		<capabilities>

+			<capability>

+				<inputs>

+					<type allAnnotatorFeatures="true">

+						uima.tt.TokenAnnotation

+					</type>

+					<!-- <type allAnnotatorFeatures="true">uima.tt.SentenceAnnotation</type>

+						<type allAnnotatorFeatures="true">uima.tt.ParagraphAnnotation</type> -->

+				</inputs>

+				<outputs>

+					<type allAnnotatorFeatures="true">

+						org.apache.uima.conceptMapper.DictTerm

+					</type>

+					<type allAnnotatorFeatures="true">

+						uima.tt.TokenAnnotation

+					</type>

+					<type allAnnotatorFeatures="true">

+						org.apache.uima.conceptMapper.support.tokenizer.TokenAnnotation

+					</type>

+					<type allAnnotatorFeatures="true">

+						uima.tcas.DocumentAnnotation

+					</type>

+				</outputs>

+				<languagesSupported />

+			</capability>

+		</capabilities>

+		<operationalProperties>

+			<modifiesCas>true</modifiesCas>

+			<multipleDeploymentAllowed>true</multipleDeploymentAllowed>

+			<outputsNewCASes>false</outputsNewCASes>

+		</operationalProperties>

+	</analysisEngineMetaData>

+	<externalResourceDependencies>

+		<externalResourceDependency>

+			<key>DictionaryFile</key>

+			<description>dictionary file loader.</description>

+			<interfaceName>

+				org.apache.uima.conceptMapper.support.dictionaryResource.DictionaryResource

+			</interfaceName>

+			<optional>false</optional>

+		</externalResourceDependency>

+	</externalResourceDependencies>

+	<resourceManagerConfiguration>

+		<externalResources>

+			<externalResource>

+				<name>DictionaryFileName</name>

+				<description>

+					A file containing the dictionary. Modify this URL to

+					use a different dictionary.

+				</description>

+				<fileResourceSpecifier>

+					<fileUrl>file:dict/testDict.xml</fileUrl>

+				</fileResourceSpecifier>

+				<implementationName>

+					org.apache.uima.conceptMapper.support.dictionaryResource.DictionaryResource_impl

+				</implementationName>

+			</externalResource>

+		</externalResources>

+		<externalResourceBindings>

+			<externalResourceBinding>

+				<key>DictionaryFile</key>

+				<resourceName>DictionaryFileName</resourceName>

+			</externalResourceBinding>

+		</externalResourceBindings>

+	</resourceManagerConfiguration>

+</taeDescription>


diff --git a/ConceptMapper/src/main/resources/analysis_engine/primitive/DictTerm.xml b/ConceptMapper/src/main/resources/analysis_engine/primitive/DictTerm.xml
new file mode 100644
index 0000000..8740777
--- /dev/null
+++ b/ConceptMapper/src/main/resources/analysis_engine/primitive/DictTerm.xml

@@ -0,0 +1,53 @@
+<?xml version="1.0" encoding="UTF-8"?>

+<!--

+  Licensed to the Apache Software Foundation (ASF) under one

+  or more contributor license agreements.  See the NOTICE file

+  distributed with this work for additional information

+  regarding copyright ownership.  The ASF licenses this file

+  to you under the Apache License, Version 2.0 (the

+  "License"); you may not use this file except in compliance

+  with the License.  You may obtain a copy of the License at

+  

+  http://www.apache.org/licenses/LICENSE-2.0

+  

+  Unless required by applicable law or agreed to in writing,

+  software distributed under the License is distributed on an

+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY

+  KIND, either express or implied.  See the License for the

+  specific language governing permissions and limitations

+  under the License.    

+-->  

+<typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">

+	<name>DictTerm</name>

+	<version>1</version>

+	<vendor>IBM</vendor>

+	<types>

+		<typeDescription>

+			<name>org.apache.uima.conceptMapper.DictTerm</name>

+			<description>Annotation for dictionary lookup matches</description>

+			<supertypeName>uima.tcas.Annotation</supertypeName>

+			<features>

+				<featureDescription>

+					<name>DictCanon</name>

+					<description>canonical form</description>

+					<rangeTypeName>uima.cas.String</rangeTypeName>

+				</featureDescription>

+				<featureDescription>

+					<name>enclosingSpan</name>

+					<description>span that this NoTerm is contained within (i.e. its sentence)</description>

+					<rangeTypeName>uima.tcas.Annotation</rangeTypeName>

+				</featureDescription>

+				<featureDescription>

+					<name>matchedText</name>

+					<description></description>

+					<rangeTypeName>uima.cas.String</rangeTypeName>

+				</featureDescription>

+				<featureDescription>

+					<name>matchedTokens</name>

+					<description></description>

+					<rangeTypeName>uima.cas.FSArray</rangeTypeName>

+				</featureDescription>

+			</features>

+		</typeDescription>

+	</types>

+</typeSystemDescription>


diff --git a/ConceptMapper/src/main/resources/analysis_engine/primitive/OffsetTokenizer.xml b/ConceptMapper/src/main/resources/analysis_engine/primitive/OffsetTokenizer.xml
new file mode 100644
index 0000000..9e3e735
--- /dev/null
+++ b/ConceptMapper/src/main/resources/analysis_engine/primitive/OffsetTokenizer.xml

@@ -0,0 +1,101 @@
+<?xml version="1.0" encoding="UTF-8"?>

+<!--

+  Licensed to the Apache Software Foundation (ASF) under one

+  or more contributor license agreements.  See the NOTICE file

+  distributed with this work for additional information

+  regarding copyright ownership.  The ASF licenses this file

+  to you under the Apache License, Version 2.0 (the

+  "License"); you may not use this file except in compliance

+  with the License.  You may obtain a copy of the License at

+  

+  http://www.apache.org/licenses/LICENSE-2.0

+  

+  Unless required by applicable law or agreed to in writing,

+  software distributed under the License is distributed on an

+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY

+  KIND, either express or implied.  See the License for the

+  specific language governing permissions and limitations

+  under the License.    

+-->  

+<taeDescription xmlns="http://uima.apache.org/resourceSpecifier">

+	<frameworkImplementation>org.apache.uima.java</frameworkImplementation>

+	<primitive>true</primitive>

+	<annotatorImplementationName>org.apache.uima.conceptMapper.support.tokenizer.OffsetTokenizer</annotatorImplementationName>

+	<analysisEngineMetaData>

+		<name>OffsetTokenenizer</name>

+		<configurationParameters>

+			

+			<configurationParameter>

+				<name>caseMatch</name>

+				<description>matching case sensitive or case insensitive</description>

+				<type>String</type>

+				<multiValued>false</multiValued>

+				<mandatory>true</mandatory>

+			</configurationParameter>

+			

+			<configurationParameter>

+				<name>tokenDelimiters</name>

+				<description>String of characters that separate tokens</description>

+				<type>String</type>

+				<multiValued>false</multiValued>

+				<mandatory>false</mandatory>

+			</configurationParameter>

+			

+		</configurationParameters>

+		

+		<configurationParameterSettings>

+			<nameValuePair>

+				<name>caseMatch</name>

+				<value>

+					<string>ignoreall</string>

+				</value>

+			</nameValuePair>

+			<nameValuePair>

+				<name>tokenDelimiters</name>

+				<value>

+					<string>

+						/-*&amp;@(){}|[]&gt;&lt;\'`":;,$%+.?!

+					</string>

+				</value>

+			</nameValuePair>

+		</configurationParameterSettings>

+		

+		<typeSystemDescription>

+			<types>

+				<typeDescription>

+					<name>org.apache.uima.conceptMapper.support.tokenizer.TokenAnnotation</name>

+					<description />

+					<supertypeName>uima.tt.TokenAnnotation</supertypeName>

+					<features>

+						<featureDescription>

+							<name>text</name>

+							<description></description>

+							<rangeTypeName>uima.cas.String</rangeTypeName>

+						</featureDescription>

+					</features>

+				</typeDescription>

+				<typeDescription>

+					<name>uima.tt.TokenAnnotation</name>

+					<description />

+					<supertypeName>uima.tcas.Annotation</supertypeName>

+				</typeDescription>

+			</types>

+		</typeSystemDescription>

+		<capabilities>

+			<capability>

+				<inputs />

+				<outputs>

+					<type allAnnotatorFeatures="true">org.apache.uima.conceptMapper.support.tokenizer.TokenAnnotation</type>

+					<type allAnnotatorFeatures="true">uima.tt.TokenAnnotation</type>

+					<type allAnnotatorFeatures="true">uima.tcas.DocumentAnnotation</type>

+				</outputs>

+				<languagesSupported />

+			</capability>

+		</capabilities>

+		<operationalProperties>

+			<modifiesCas>true</modifiesCas>

+			<multipleDeploymentAllowed>true</multipleDeploymentAllowed>

+			<outputsNewCASes>false</outputsNewCASes>

+		</operationalProperties>

+	</analysisEngineMetaData>

+</taeDescription>


diff --git a/ConceptMapper/src/main/resources/collection_processing_engines/TestConceptMapperCPE.xml b/ConceptMapper/src/main/resources/collection_processing_engines/TestConceptMapperCPE.xml
new file mode 100644
index 0000000..d5042af
--- /dev/null
+++ b/ConceptMapper/src/main/resources/collection_processing_engines/TestConceptMapperCPE.xml

@@ -0,0 +1,76 @@
+<?xml version="1.0" encoding="UTF-8"?>

+<!--

+  Licensed to the Apache Software Foundation (ASF) under one

+  or more contributor license agreements.  See the NOTICE file

+  distributed with this work for additional information

+  regarding copyright ownership.  The ASF licenses this file

+  to you under the Apache License, Version 2.0 (the

+  "License"); you may not use this file except in compliance

+  with the License.  You may obtain a copy of the License at

+  

+  http://www.apache.org/licenses/LICENSE-2.0

+  

+  Unless required by applicable law or agreed to in writing,

+  software distributed under the License is distributed on an

+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY

+  KIND, either express or implied.  See the License for the

+  specific language governing permissions and limitations

+  under the License.    

+-->

+<cpeDescription xmlns="http://uima.apache.org/resourceSpecifier">

+  <collectionReader>

+    <collectionIterator>

+      <descriptor>

+        <import location="../../../com.ibm.bluej.core/descriptors/com/ibm/bluej/core/xmi_zip_reader/XmiZipReader.xml"/>

+      </descriptor>

+      <configurationParameterSettings>

+        <nameValuePair>

+          <name>InputFile</name>

+          <value>

+            <string>/data5/BlueJShared/BlueJ_01/questions/30001-30010.zip</string>

+          </value>

+        </nameValuePair>

+      </configurationParameterSettings>

+    </collectionIterator>

+  </collectionReader>

+  <casProcessors casPoolSize="3" processingUnitThreadCount="1">

+    <casProcessor deployment="integrated" name="DictMatcher">

+      <descriptor>

+        <import location="../analysis_engine/aggregate/JFrostTokenizerMatcher.xml"/>

+      </descriptor>

+      <deploymentParameters/>

+      <errorHandling>

+        <errorRateThreshold action="terminate" value="0/1000"/>

+        <maxConsecutiveRestarts action="terminate" value="30"/>

+        <timeout max="100000" default="-1"/>

+      </errorHandling>

+      <checkpoint batch="10000" time="1000ms"/>

+    </casProcessor>

+    <casProcessor deployment="integrated" name="XmiZipWriter">

+      <descriptor>

+        <import location="../../../com.ibm.bluej.core/descriptors/com/ibm/bluej/core/xmi_zip_writer/XmiZipWriter.xml"/>

+      </descriptor>

+      <deploymentParameters/>

+      <errorHandling>

+        <errorRateThreshold action="terminate" value="0/1000"/>

+        <maxConsecutiveRestarts action="terminate" value="30"/>

+        <timeout max="100000" default="-1"/>

+      </errorHandling>

+      <checkpoint batch="10000" time="1000ms"/>

+      <configurationParameterSettings>

+        <nameValuePair>

+          <name>OutputFile</name>

+          <value>

+            <string>/tmp/out.zip</string>

+          </value>

+        </nameValuePair>

+      </configurationParameterSettings>

+    </casProcessor>

+  </casProcessors>

+  <cpeConfig>

+    <numToProcess>-1</numToProcess>

+    <deployAs>immediate</deployAs>

+    <checkpoint batch="0" time="300000ms"/>

+    <timerImpl></timerImpl>

+  </cpeConfig>

+</cpeDescription>
\ No newline at end of file
commit	08144aab5832c18f72c5ace487cc7123e40e428f	[log] [tgz]
author	Marshall Schor <schor@apache.org>	Wed Apr 18 17:42:46 2018 +0000
committer	Marshall Schor <schor@apache.org>	Wed Apr 18 17:42:46 2018 +0000
tree	cc0a1e7fb4143ce88b0d68d9163858998d6e8459
parent	92040bbb73997a1d356c98551e6da64acef5955e [diff]