moved from sandbox
diff --git a/modelbuilder-addon/pom.xml b/modelbuilder-addon/pom.xml
new file mode 100644
index 0000000..4a9c886
--- /dev/null
+++ b/modelbuilder-addon/pom.xml
@@ -0,0 +1,35 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"

+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">

+  <modelVersion>4.0.0</modelVersion>

+ <parent>

+    <groupId>org.apache.opennlp</groupId>

+    <artifactId>opennlp</artifactId>

+    <version>1.6.0-SNAPSHOT</version>

+    <relativePath>../opennlp/pom.xml</relativePath>

+  </parent>

+

+  <artifactId>modelbuilder-addon</artifactId>

+  <version>1.0-SNAPSHOT</version>

+  <packaging>jar</packaging>

+

+  <name>modelbuilder-addon</name>

+  <url>http://maven.apache.org</url>

+

+  <properties>

+    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>

+  </properties>

+

+  <dependencies>

+    <dependency>

+      <groupId>junit</groupId>

+      <artifactId>junit</artifactId>

+      <version>3.8.1</version>

+      <scope>test</scope>

+    </dependency>

+      <dependency>

+      <groupId>org.apache.opennlp</groupId>

+      <artifactId>opennlp-tools</artifactId>

+      <version>1.6.0-SNAPSHOT</version>

+    </dependency>

+  </dependencies>

+</project>

diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/DefaultModelBuilderUtil.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/DefaultModelBuilderUtil.java
new file mode 100644
index 0000000..81ff9fd
--- /dev/null
+++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/DefaultModelBuilderUtil.java
@@ -0,0 +1,117 @@
+/*

+ * Copyright 2013 The Apache Software Foundation.

+ *

+ * Licensed under the Apache License, Version 2.0 (the "License");

+ * you may not use this file except in compliance with the License.

+ * You may obtain a copy of the License at

+ *

+ *      http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+package opennlp.addons.modelbuilder;

+

+import java.io.File;

+import opennlp.addons.modelbuilder.impls.BaseModelBuilderParams;

+import opennlp.addons.modelbuilder.impls.FileKnownEntityProvider;

+import opennlp.addons.modelbuilder.impls.FileModelValidatorImpl;

+import opennlp.addons.modelbuilder.impls.FileSentenceProvider;

+import opennlp.addons.modelbuilder.impls.GenericModelGenerator;

+import opennlp.addons.modelbuilder.impls.GenericModelableImpl;

+

+/**

+ *

+ * Utilizes the filebased implementations to produce an NER model from user

+ * The basic processing is such

+ * read in the list of known entities

+ * annotate the sentences based on the list of known entities

+ * create a model from the annotations

+ * perform NER with the model on the sentences

+ * add the NER results to the annotations

+ * rebuild the model

+ * loop

+ * defined data

+ */

+public class DefaultModelBuilderUtil {

+

+  /**

+   *

+   * @param sentences                a file that contains one sentence per line.

+   *                                 There should be at least 15K sentences

+   *                                 consisting of a representative sample from

+   *                                 user data

+   * @param knownEntities            a file consisting of a simple list of

+   *                                 unambiguous entities, one entry per line.

+   *                                 For instance, if one was trying to build a

+   *                                 person NER model then this file would be a

+   *                                 list of person names that are unambiguous

+   *                                 and are known to exist in the sentences

+   *                                 file

+   * @param knownEntitiesBlacklist   This file contains a list of known bad hits

+   *                                 that the NER phase of this processing might

+   *                                 catch early one before the model iterates

+   *                                 to maturity

+   * @param modelOutFile             the location where the model will be

+   *                                 written to

+   * @param annotatedSentenceOutFile where the annotated sentences produced by

+   *                                 this process will be written to

+   * @param namedEntityType          the type of entity... for example, person,

+   *                                 location, organization...

+   * @param iterations               how many times to repeat the iterative loop

+   *                                 of annotation, model generation, and NER

+   */

+  public static void generateModel(File sentences, File knownEntities, File knownEntitiesBlacklist,

+          File modelOutFile, File annotatedSentenceOutFile, String namedEntityType, int iterations) {

+    SemiSupervisedModelGenerator modelGenerator = new GenericModelGenerator();

+    BaseModelBuilderParams params = new BaseModelBuilderParams();

+    params.setAnnotatedTrainingDataFile(annotatedSentenceOutFile);

+    params.setSentenceFile(sentences);

+    params.setEntityType(namedEntityType);

+    params.setKnownEntitiesFile(knownEntities);

+    params.setModelFile(modelOutFile);

+    params.setKnownEntityBlacklist(knownEntitiesBlacklist);

+    /**

+     * sentence providers feed this process with user data derived sentences

+     * this impl just reads line by line through a file

+     */

+    SentenceProvider sentenceProvider = new FileSentenceProvider();

+    sentenceProvider.setParameters(params);

+    /**

+     * KnownEntityProviders provide a seed list of known entities... such as

+     * Barack Obama for person, or Germany for location obviously these would

+     * want to be prolific, non ambiguous names

+     */

+    KnownEntityProvider knownEntityProvider = new FileKnownEntityProvider();

+    knownEntityProvider.setParameters(params);

+    /**

+     * ModelGenerationValidators try to weed out bad hits by the iterations of

+     * the name finder. Since this is a recursive process, with each iteration

+     * the namefinder will get more and more greedy if bad entities are allowed

+     * in this provides a mechanism for throwing out obviously bad hits. A good

+     * impl may be to make sure a location is actually within a noun phrase

+     * etc...users can make this as specific as they need for their dat and

+     * their use case

+     */

+    ModelGenerationValidator validator = new FileModelValidatorImpl();

+    validator.setParameters(params);

+    /**

+     * Modelable's write and read the annotated sentences, as well as create and

+     * write the NER models

+     */

+    Modelable modelable = new GenericModelableImpl();

+    modelable.setParameters(params);

+

+    /**

+     * the modelGenerator actually runs the process with a set number of

+     * iterations... could be better by actually calculating the diff between

+     * runs and stopping based on a thresh, but for extrememly large sentence

+     * sets this may be too much.

+     */

+    modelGenerator.build(sentenceProvider, knownEntityProvider, validator, modelable, iterations);

+

+  }

+}

diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/KnownEntityProvider.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/KnownEntityProvider.java
new file mode 100644
index 0000000..694250e
--- /dev/null
+++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/KnownEntityProvider.java
@@ -0,0 +1,45 @@
+/*

+ * Copyright 2013 The Apache Software Foundation.

+ *

+ * Licensed under the Apache License, Version 2.0 (the "License");

+ * you may not use this file except in compliance with the License.

+ * You may obtain a copy of the License at

+ *

+ *      http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+package opennlp.addons.modelbuilder;

+

+import java.util.Set;

+

+

+

+/**

+ *

+Supplies a list of known entities (a list of names or locations)

+ */

+public interface KnownEntityProvider extends ModelParameter{

+  /**

+ * returns a list of known non ambiguous entities.

+ * @return a set of entities

+ */

+  Set<String> getKnownEntities();

+/**

+ * adds to the set of known entities. Overriding classes should hold this list in a class level set.

+ * @param unambiguousEntity 

+ */

+  void addKnownEntity(String unambiguousEntity);

+/**

+ * defines the type of entity that the set contains, ie person, location, organization.

+ * @return 

+ */

+  String getKnownEntitiesType();

+  

+  

+  

+}

diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelGenerationValidator.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelGenerationValidator.java
new file mode 100644
index 0000000..4bd5fe2
--- /dev/null
+++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelGenerationValidator.java
@@ -0,0 +1,33 @@
+/*

+ * Copyright 2013 The Apache Software Foundation.

+ *

+ * Licensed under the Apache License, Version 2.0 (the "License");

+ * you may not use this file except in compliance with the License.

+ * You may obtain a copy of the License at

+ *

+ *      http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+package opennlp.addons.modelbuilder;

+

+import java.util.Collection;

+

+/**

+ *

+Validates results from the iterative namefinding

+ */

+public interface ModelGenerationValidator extends ModelParameter {

+

+  Boolean validSentence(String sentence);

+

+  Boolean validNamedEntity(String namedEntity);

+  

+

+

+  Collection<String> getBlackList();

+}

diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelParameter.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelParameter.java
new file mode 100644
index 0000000..136e775
--- /dev/null
+++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelParameter.java
@@ -0,0 +1,28 @@
+/*

+ * Copyright 2013 The Apache Software Foundation.

+ *

+ * Licensed under the Apache License, Version 2.0 (the "License");

+ * you may not use this file except in compliance with the License.

+ * You may obtain a copy of the License at

+ *

+ *      http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+package opennlp.addons.modelbuilder;

+

+import opennlp.addons.modelbuilder.impls.BaseModelBuilderParams;

+

+/**

+ *

+ */

+public interface ModelParameter<T extends  BaseModelBuilderParams>{

+   

+  void setParameters(T params);

+  

+

+}

diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/Modelable.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/Modelable.java
new file mode 100644
index 0000000..80b0170
--- /dev/null
+++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/Modelable.java
@@ -0,0 +1,45 @@
+/*

+ * Copyright 2013 The Apache Software Foundation.

+ *

+ * Licensed under the Apache License, Version 2.0 (the "License");

+ * you may not use this file except in compliance with the License.

+ * You may obtain a copy of the License at

+ *

+ *      http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+package opennlp.addons.modelbuilder;

+

+import java.util.Set;

+import opennlp.tools.namefind.TokenNameFinderModel;

+

+/**

+ *

+ */

+public interface Modelable extends ModelParameter{

+

+

+

+  String annotate(String sentence, String namedEntity, String entityType);

+

+  void writeAnnotatedSentences();

+

+  Set<String> getAnnotatedSentences();

+

+  void setAnnotatedSentences(Set<String> annotatedSentences);

+

+  void addAnnotatedSentence(String annotatedSentence);

+

+  void buildModel( String entityType);

+

+  TokenNameFinderModel getModel();

+

+  String[] tokenizeSentenceToWords(String sentence);

+  

+

+}

diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/SemiSupervisedModelGenerator.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/SemiSupervisedModelGenerator.java
new file mode 100644
index 0000000..c97a4c1
--- /dev/null
+++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/SemiSupervisedModelGenerator.java
@@ -0,0 +1,28 @@
+/*

+ * Copyright 2013 The Apache Software Foundation.

+ *

+ * Licensed under the Apache License, Version 2.0 (the "License");

+ * you may not use this file except in compliance with the License.

+ * You may obtain a copy of the License at

+ *

+ *      http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+package opennlp.addons.modelbuilder;

+

+import opennlp.addons.modelbuilder.impls.BaseModelBuilderParams;

+

+/**

+ *

+

+ */

+public interface SemiSupervisedModelGenerator extends ModelParameter<BaseModelBuilderParams> {

+

+  void build(SentenceProvider sentenceProvider, KnownEntityProvider knownEntityProvider, 

+          ModelGenerationValidator validator, Modelable modelable, int iterations);

+}

diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/SentenceProvider.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/SentenceProvider.java
new file mode 100644
index 0000000..5610224
--- /dev/null
+++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/SentenceProvider.java
@@ -0,0 +1,27 @@
+/*

+ * Copyright 2013 The Apache Software Foundation.

+ *

+ * Licensed under the Apache License, Version 2.0 (the "License");

+ * you may not use this file except in compliance with the License.

+ * You may obtain a copy of the License at

+ *

+ *      http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+package opennlp.addons.modelbuilder;

+

+import java.util.Set;

+import opennlp.addons.modelbuilder.impls.BaseModelBuilderParams;

+

+/**

+ *

+ */

+public interface SentenceProvider extends ModelParameter<BaseModelBuilderParams> {

+

+  Set<String> getSentences();

+}

diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/BaseModelBuilderParams.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/BaseModelBuilderParams.java
new file mode 100644
index 0000000..fcb2384
--- /dev/null
+++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/BaseModelBuilderParams.java
@@ -0,0 +1,90 @@
+/*

+ * Copyright 2013 The Apache Software Foundation.

+ *

+ * Licensed under the Apache License, Version 2.0 (the "License");

+ * you may not use this file except in compliance with the License.

+ * You may obtain a copy of the License at

+ *

+ *      http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+package opennlp.addons.modelbuilder.impls;

+

+import java.io.File;

+import java.util.Map;

+

+/**

+ *

+ * Used to pass params through the processing

+ */

+public class BaseModelBuilderParams {

+

+  private File modelFile;

+  private File sentenceFile;

+  private File knownEntitiesFile;

+  private File knownEntityBlacklist;

+  private File annotatedTrainingDataFile;

+  private String entityType;

+  private Map<String, String> additionalParams;

+

+  public File getModelFile() {

+    return modelFile;

+  }

+

+  public void setModelFile(File modelFile) {

+    this.modelFile = modelFile;

+  }

+

+  public File getSentenceFile() {

+    return sentenceFile;

+  }

+

+  public void setSentenceFile(File sentenceFile) {

+    this.sentenceFile = sentenceFile;

+  }

+

+  public File getKnownEntitiesFile() {

+    return knownEntitiesFile;

+  }

+

+  public void setKnownEntitiesFile(File knownEntitiesFile) {

+    this.knownEntitiesFile = knownEntitiesFile;

+  }

+

+  public File getKnownEntityBlacklist() {

+    return knownEntityBlacklist;

+  }

+

+  public void setKnownEntityBlacklist(File knownEntityBlacklist) {

+    this.knownEntityBlacklist = knownEntityBlacklist;

+  }

+

+  public Map<String, String> getAdditionalParams() {

+    return additionalParams;

+  }

+

+  public void setAdditionalParams(Map<String, String> additionalParams) {

+    this.additionalParams = additionalParams;

+  }

+

+  public String getEntityType() {

+    return entityType;

+  }

+

+  public void setEntityType(String entityType) {

+    this.entityType = entityType;

+  }

+

+  public File getAnnotatedTrainingDataFile() {

+    return annotatedTrainingDataFile;

+  }

+

+  public void setAnnotatedTrainingDataFile(File annotatedTrainingDataFile) {

+    this.annotatedTrainingDataFile = annotatedTrainingDataFile;

+  }

+}
\ No newline at end of file
diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileKnownEntityProvider.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileKnownEntityProvider.java
new file mode 100644
index 0000000..0de043c
--- /dev/null
+++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileKnownEntityProvider.java
@@ -0,0 +1,82 @@
+/*

+ * Copyright 2013 The Apache Software Foundation.

+ *

+ * Licensed under the Apache License, Version 2.0 (the "License");

+ * you may not use this file except in compliance with the License.

+ * You may obtain a copy of the License at

+ *

+ *      http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+package opennlp.addons.modelbuilder.impls;

+

+import java.io.BufferedReader;

+import java.io.FileInputStream;

+import java.io.FileNotFoundException;

+import java.io.IOException;

+import java.io.InputStream;

+import java.io.InputStreamReader;

+import java.nio.charset.Charset;

+import java.util.HashSet;

+import java.util.Set;

+import java.util.logging.Level;

+import java.util.logging.Logger;

+import opennlp.addons.modelbuilder.KnownEntityProvider;

+

+/**

+ *

+ */

+public class FileKnownEntityProvider implements KnownEntityProvider {

+ 

+  Set<String> knownEntities = new HashSet<String>();

+  BaseModelBuilderParams params;

+  @Override

+  public Set<String> getKnownEntities() {

+    if (knownEntities.isEmpty()) {

+      try {

+        InputStream fis;

+        BufferedReader br;

+        String line;

+

+        fis = new FileInputStream(params.getKnownEntitiesFile());

+        br = new BufferedReader(new InputStreamReader(fis, Charset.forName("UTF-8")));

+        while ((line = br.readLine()) != null) {

+          knownEntities.add(line);

+        }

+

+        // Done with the file

+        br.close();

+        br = null;

+        fis = null;

+      } catch (FileNotFoundException ex) {

+        Logger.getLogger(FileKnownEntityProvider.class.getName()).log(Level.SEVERE, null, ex);

+      } catch (IOException ex) {

+        Logger.getLogger(FileKnownEntityProvider.class.getName()).log(Level.SEVERE, null, ex);

+      }

+    }

+    return knownEntities;

+  }

+

+  @Override

+  public void addKnownEntity(String unambiguousEntity) {

+    knownEntities.add(unambiguousEntity);

+  }

+

+  @Override

+  public String getKnownEntitiesType() {

+ 

+    return params.getEntityType();

+  }

+

+

+

+  @Override

+ public void setParameters(BaseModelBuilderParams params) {

+    this.params = params;

+  }

+}

diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileModelValidatorImpl.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileModelValidatorImpl.java
new file mode 100644
index 0000000..ea4bb05
--- /dev/null
+++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileModelValidatorImpl.java
@@ -0,0 +1,96 @@
+/*

+ * Copyright 2013 The Apache Software Foundation.

+ *

+ * Licensed under the Apache License, Version 2.0 (the "License");

+ * you may not use this file except in compliance with the License.

+ * You may obtain a copy of the License at

+ *

+ *      http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+package opennlp.addons.modelbuilder.impls;

+

+import java.io.BufferedReader;

+import java.io.FileInputStream;

+import java.io.FileNotFoundException;

+import java.io.IOException;

+import java.io.InputStream;

+import java.io.InputStreamReader;

+import java.nio.charset.Charset;

+import java.util.Collection;

+import java.util.HashSet;

+import java.util.Set;

+import java.util.logging.Level;

+import java.util.logging.Logger;

+import opennlp.addons.modelbuilder.ModelGenerationValidator;

+

+/**

+ *Validates NER results input before inclusion into the model

+ */

+public class FileModelValidatorImpl implements ModelGenerationValidator {

+

+  private Set<String> badentities = new HashSet<String>();

+  BaseModelBuilderParams params;

+

+  @Override

+  public void setParameters(BaseModelBuilderParams params) {

+    this.params = params;

+  }

+

+  @Override

+  public Boolean validSentence(String sentence) {

+    //returning true by default, because the sentence provider will  return only "valid" sentences in this case

+    return true;

+  }

+

+  @Override

+  public Boolean validNamedEntity(String namedEntity) {

+

+    if (badentities.isEmpty()) {

+      getBlackList();

+    }

+//

+//    Pattern p = Pattern.compile("[0-9]", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);

+//    if (p.matcher(namedEntity).find()) {

+//      return false;

+//    }

+    Boolean b = true;

+    if (badentities.contains(namedEntity.toLowerCase())) {

+      b = false;

+    }

+    return b;

+  }

+

+  @Override

+  public Collection<String> getBlackList() {

+    if (params.getKnownEntityBlacklist() == null) {

+      return badentities;

+    }

+    if (!badentities.isEmpty()) {

+      try {

+        InputStream fis;

+        BufferedReader br;

+        String line;

+

+        fis = new FileInputStream(params.getKnownEntityBlacklist());

+        br = new BufferedReader(new InputStreamReader(fis, Charset.forName("UTF-8")));

+        while ((line = br.readLine()) != null) {

+          badentities.add(line);

+        }

+        br.close();

+        br = null;

+        fis = null;

+      } catch (FileNotFoundException ex) {

+        Logger.getLogger(FileKnownEntityProvider.class.getName()).log(Level.SEVERE, null, ex);

+      } catch (IOException ex) {

+        Logger.getLogger(FileKnownEntityProvider.class.getName()).log(Level.SEVERE, null, ex);

+      }

+    }

+    return badentities;

+  }

+}

diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileSentenceProvider.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileSentenceProvider.java
new file mode 100644
index 0000000..bea55f5
--- /dev/null
+++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileSentenceProvider.java
@@ -0,0 +1,70 @@
+/*

+ * Copyright 2013 The Apache Software Foundation.

+ *

+ * Licensed under the Apache License, Version 2.0 (the "License");

+ * you may not use this file except in compliance with the License.

+ * You may obtain a copy of the License at

+ *

+ *      http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+package opennlp.addons.modelbuilder.impls;

+

+import java.io.BufferedReader;

+import java.io.FileInputStream;

+import java.io.FileNotFoundException;

+import java.io.IOException;

+import java.io.InputStream;

+import java.io.InputStreamReader;

+import java.nio.charset.Charset;

+import java.util.HashSet;

+import java.util.Set;

+import java.util.logging.Level;

+import java.util.logging.Logger;

+import opennlp.addons.modelbuilder.SentenceProvider;

+

+/**

+ * Provides user sentences via a simple text file

+ */

+public class FileSentenceProvider implements SentenceProvider {

+

+  BaseModelBuilderParams params ;

+  Set<String> sentences = new HashSet<String>();

+

+  public Set<String> getSentences() {

+     if (sentences.isEmpty()) {

+      try {

+        InputStream fis;

+        BufferedReader br;

+        String line;

+

+        fis = new FileInputStream(params.getSentenceFile());

+        br = new BufferedReader(new InputStreamReader(fis, Charset.forName("UTF-8")));

+        int i=0;

+        while ((line = br.readLine()) != null) {

+         

+          sentences.add(line);

+        }

+

+        // Done with the file

+        br.close();

+        br = null;

+        fis = null;

+      } catch (FileNotFoundException ex) {

+        Logger.getLogger(FileKnownEntityProvider.class.getName()).log(Level.SEVERE, null, ex);

+      } catch (IOException ex) {

+        Logger.getLogger(FileKnownEntityProvider.class.getName()).log(Level.SEVERE, null, ex);

+      }

+    }

+    return sentences;

+  }

+

+ public void setParameters(BaseModelBuilderParams params) {

+    this.params = params;

+  }

+}

diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelGenerator.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelGenerator.java
new file mode 100644
index 0000000..bbd23e1
--- /dev/null
+++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelGenerator.java
@@ -0,0 +1,103 @@
+/*

+ * Copyright 2013 The Apache Software Foundation.

+ *

+ * Licensed under the Apache License, Version 2.0 (the "License");

+ * you may not use this file except in compliance with the License.

+ * You may obtain a copy of the License at

+ *

+ *      http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+package opennlp.addons.modelbuilder.impls;

+

+import java.util.HashMap;

+import java.util.Map;

+import opennlp.addons.modelbuilder.KnownEntityProvider;

+import opennlp.addons.modelbuilder.ModelGenerationValidator;

+import opennlp.addons.modelbuilder.Modelable;

+import opennlp.addons.modelbuilder.SemiSupervisedModelGenerator;

+import opennlp.addons.modelbuilder.SentenceProvider;

+import opennlp.tools.namefind.NameFinderME;

+import opennlp.tools.util.Span;

+

+/**

+ *

+ * Generic impl that handles all processing using the default file implementations

+ */

+public class GenericModelGenerator implements SemiSupervisedModelGenerator {

+

+  private Map<String, String> params = new HashMap<String, String>();

+

+  @Override

+  public void setParameters(BaseModelBuilderParams params) {

+    this.params = params.getAdditionalParams();

+  }

+

+  @Override

+  public void build(SentenceProvider sentenceProvider, KnownEntityProvider knownEntityProvider,

+          ModelGenerationValidator validator, Modelable modelable, int iterations) {

+    for (int iteration = 0; iteration < iterations; iteration++) {

+      System.out.println("ITERATION: " + iteration);

+      System.out.println("\tPerfoming Known Entity Annotation");

+      System.out.println("\t\tknowns: " + knownEntityProvider.getKnownEntities().size());

+      System.out.println("\t\treading data....: ");

+      for (String sentence : sentenceProvider.getSentences()) {

+        for (String knownEntity : knownEntityProvider.getKnownEntities()) {

+          if (sentence.contains(knownEntity)) {

+            //if the same sentence has multiple hits should they be annotated separately?

+            modelable.addAnnotatedSentence(modelable.annotate(sentence, knownEntity, knownEntityProvider.getKnownEntitiesType()));

+          }

+        }

+      }

+      if (sentenceProvider.getSentences().isEmpty()) {

+        System.out.println("No sentences in file");

+        return;

+      }

+      if (knownEntityProvider.getKnownEntities().isEmpty()) {

+        System.out.println("No known entities in file");

+        return;

+      }

+      System.out.println("\t\twriting annotated sentences....: ");

+      modelable.writeAnnotatedSentences();

+          System.out.println("\t\tbuilding model.... ");

+      modelable.buildModel(knownEntityProvider.getKnownEntitiesType());

+      System.out.println("\t\tmodel building complete.... ");

+      NameFinderME nf = new NameFinderME(modelable.getModel());

+      System.out.println("\t\tannotated sentences: " + modelable.getAnnotatedSentences().size());

+      System.out.println("\tPerforming NER with new model");

+      System.out.println("\t\tPrinting NER Results. Add undesired results to the blacklist file and start over");

+      for (String sentence : sentenceProvider.getSentences()) {

+        if (!validator.validSentence(sentence)) {

+          continue;

+        }

+        String[] tokens = modelable.tokenizeSentenceToWords(sentence);

+

+        Span[] find = nf.find(tokens);

+        nf.clearAdaptiveData();

+

+        String[] namedEntities = Span.spansToStrings(find, tokens);

+

+        for (String namedEntity : namedEntities) {

+          System.out.println("\t\t" + namedEntity);

+          if (validator.validNamedEntity(namedEntity)) {

+

+            knownEntityProvider.addKnownEntity(namedEntity);

+            modelable.addAnnotatedSentence(modelable.annotate(sentence, namedEntity, knownEntityProvider.getKnownEntitiesType()));

+

+          } else {

+            System.out.println("\t\t" + namedEntity + "...already blacklisted");

+          }

+        }

+      }

+      System.out.println("\t\tannotated sentences: " + modelable.getAnnotatedSentences().size());

+      System.out.println("\t\tknowns: " + knownEntityProvider.getKnownEntities().size());

+    }

+    modelable.writeAnnotatedSentences();

+    modelable.buildModel(knownEntityProvider.getKnownEntitiesType());

+  }

+}

diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelableImpl.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelableImpl.java
new file mode 100644
index 0000000..572e84b
--- /dev/null
+++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelableImpl.java
@@ -0,0 +1,127 @@
+/*

+ * Copyright 2013 The Apache Software Foundation.

+ *

+ * Licensed under the Apache License, Version 2.0 (the "License");

+ * you may not use this file except in compliance with the License.

+ * You may obtain a copy of the License at

+ *

+ *      http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+package opennlp.addons.modelbuilder.impls;

+

+import java.io.BufferedOutputStream;

+import java.io.FileInputStream;

+import java.io.FileOutputStream;

+import java.io.FileWriter;

+import java.io.IOException;

+import java.io.OutputStream;

+import java.nio.charset.Charset;

+import java.util.HashSet;

+import java.util.Set;

+import java.util.logging.Level;

+import java.util.logging.Logger;

+import opennlp.addons.modelbuilder.Modelable;

+import opennlp.tools.namefind.NameFinderME;

+import opennlp.tools.namefind.NameSample;

+import opennlp.tools.namefind.NameSampleDataStream;

+import opennlp.tools.namefind.TokenNameFinderModel;

+import opennlp.tools.util.ObjectStream;

+import opennlp.tools.util.PlainTextByLineStream;

+

+/**

+ * Creates annotations, writes annotations to file, and creates a model and writes to a file

+ */

+public class GenericModelableImpl implements Modelable {

+

+  private Set<String> annotatedSentences = new HashSet<String>();

+  BaseModelBuilderParams params;

+

+  @Override

+  public void setParameters(BaseModelBuilderParams params) {

+    this.params = params;

+  }

+

+  @Override

+  public String annotate(String sentence, String namedEntity, String entityType) {

+    String annotation = sentence.replace(namedEntity, " <START:" + entityType + "> " + namedEntity + " <END> ");

+    return annotation;

+  }

+

+  @Override

+  public void writeAnnotatedSentences() {

+    try {

+

+      FileWriter writer = new FileWriter(params.getAnnotatedTrainingDataFile(), false);

+

+      for (String s : annotatedSentences) {

+        writer.write(s.replace("\n", " ").trim() + "\n");

+      }

+      writer.close();

+    } catch (IOException ex) {

+      ex.printStackTrace();

+    }

+  }

+

+  @Override

+  public Set<String> getAnnotatedSentences() {

+    return annotatedSentences;

+  }

+

+  @Override

+  public void setAnnotatedSentences(Set<String> annotatedSentences) {

+    this.annotatedSentences = annotatedSentences;

+  }

+

+  @Override

+  public void addAnnotatedSentence(String annotatedSentence) {

+    annotatedSentences.add(annotatedSentence);

+  }

+

+  @Override

+  public void buildModel(String entityType) {

+    try {

+      System.out.println("\tBuilding Model using " + annotatedSentences.size() + " annotations");

+      System.out.println("\t\treading training data...");

+      Charset charset = Charset.forName("UTF-8");

+      ObjectStream<String> lineStream =

+              new PlainTextByLineStream(new FileInputStream(params.getAnnotatedTrainingDataFile()), charset);

+      ObjectStream<NameSample> sampleStream = new NameSampleDataStream(lineStream);

+

+      TokenNameFinderModel model;

+      model = NameFinderME.train("en", entityType, sampleStream, null);

+      sampleStream.close();

+      OutputStream modelOut = new BufferedOutputStream(new FileOutputStream(params.getModelFile()));

+      model.serialize(modelOut);

+      if (modelOut != null) {

+        modelOut.close();

+      }

+      System.out.println("\tmodel generated");

+    } catch (Exception e) {

+    }

+  }

+

+  @Override

+  public TokenNameFinderModel getModel() {

+

+

+    TokenNameFinderModel nerModel = null;

+    try {

+      nerModel = new TokenNameFinderModel(new FileInputStream(params.getModelFile()));

+    } catch (IOException ex) {

+      Logger.getLogger(GenericModelableImpl.class.getName()).log(Level.SEVERE, null, ex);

+    }

+    return nerModel;

+  }

+

+  @Override

+  public String[] tokenizeSentenceToWords(String sentence) {

+    return sentence.split(" ");

+

+  }

+}

diff --git a/modelbuilder-addon/src/test/java/modelbuilder/AppTest.java b/modelbuilder-addon/src/test/java/modelbuilder/AppTest.java
new file mode 100644
index 0000000..2b04731
--- /dev/null
+++ b/modelbuilder-addon/src/test/java/modelbuilder/AppTest.java
@@ -0,0 +1,38 @@
+package modelbuilder;

+

+import junit.framework.Test;

+import junit.framework.TestCase;

+import junit.framework.TestSuite;

+

+/**

+ * Unit test for simple App.

+ */

+public class AppTest 

+    extends TestCase

+{

+    /**

+     * Create the test case

+     *

+     * @param testName name of the test case

+     */

+    public AppTest( String testName )

+    {

+        super( testName );

+    }

+

+    /**

+     * @return the suite of tests being tested

+     */

+    public static Test suite()

+    {

+        return new TestSuite( AppTest.class );

+    }

+

+    /**

+     * Rigourous Test :-)

+     */

+    public void testApp()

+    {

+        assertTrue( true );

+    }

+}