OPENNLP-850 Add ner brat annotation service
diff --git a/mallet-addon/params/crf-params.txt b/mallet-addon/params/crf-params.txt
index 0a2ace3..4873dff 100644
--- a/mallet-addon/params/crf-params.txt
+++ b/mallet-addon/params/crf-params.txt
@@ -15,6 +15,6 @@
 
 # Sample machine learning properties file
 Algorithm=opennlp.addons.mallet.CRFTrainer
-Cutoff=0
+Cutoff=2
 Iterations=100
 
diff --git a/mallet-addon/params/maxent-params.txt b/mallet-addon/params/maxent-params.txt
index d8cf288..74e8922 100644
--- a/mallet-addon/params/maxent-params.txt
+++ b/mallet-addon/params/maxent-params.txt
@@ -14,7 +14,8 @@
 # limitations under the License.
 
 # Sample machine learning properties file
-Algorithm=opennlp.addons.mallet.MaxentTrainer
+#Algorithm=opennlp.addons.mallet.MaxentTrainer
+Algorithm=PERCEPTRON
 Cutoff=0
 Iterations=100
-
+#BeamSize=5
diff --git a/mallet-addon/pom.xml b/mallet-addon/pom.xml
index 38f1fc9..c5f2ca9 100644
--- a/mallet-addon/pom.xml
+++ b/mallet-addon/pom.xml
@@ -33,7 +33,7 @@
 		<dependency>
 			<groupId>org.apache.opennlp</groupId>
 			<artifactId>opennlp-tools</artifactId>
-			<version>1.6.0</version>
+			<version>1.6.1-SNAPSHOT</version>
 		</dependency>
 		
 		<dependency>
diff --git a/mallet-addon/src/main/java/opennlp/addons/mallet/CRFTrainer.java b/mallet-addon/src/main/java/opennlp/addons/mallet/CRFTrainer.java
index 5772925..7e6de66 100644
--- a/mallet-addon/src/main/java/opennlp/addons/mallet/CRFTrainer.java
+++ b/mallet-addon/src/main/java/opennlp/addons/mallet/CRFTrainer.java
@@ -127,21 +127,20 @@
     // CRFOptimizableBy* objects (terms in the objective function)
     // objective 1: label likelihood objective
 
-    CRFTrainerByLabelLikelihood crfTrainer = new CRFTrainerByLabelLikelihood(
-        crf);
-    crfTrainer.setGaussianPriorVariance(1.0);
+//    CRFTrainerByLabelLikelihood crfTrainer = new CRFTrainerByLabelLikelihood(crf);
+//    crfTrainer.setGaussianPriorVariance(1.0);
 
-//    CRFOptimizableByLabelLikelihood optLabel = new
-//        CRFOptimizableByLabelLikelihood(crf, trainingData);
-//
+    CRFOptimizableByLabelLikelihood optLabel = new
+        CRFOptimizableByLabelLikelihood(crf, trainingData);
+
 //    // CRF trainer
-//     Optimizable.ByGradientValue[] opts = new Optimizable.ByGradientValue[] {
-//     optLabel };
+     Optimizable.ByGradientValue[] opts = new Optimizable.ByGradientValue[] {
+     optLabel };
 
-    // by default, use L-BFGS as the optimizer
-//     CRFTrainerByValueGradients crfTrainer = new CRFTrainerByValueGradients(
-//     crf, opts);
-//     crfTrainer.setMaxResets(0);
+//     by default, use L-BFGS as the optimizer
+     CRFTrainerByValueGradients crfTrainer = new CRFTrainerByValueGradients(
+     crf, opts);
+     crfTrainer.setMaxResets(0);
 
     // SNIP
 
diff --git a/mallet-addon/src/main/java/opennlp/addons/mallet/MaxentTrainer.java b/mallet-addon/src/main/java/opennlp/addons/mallet/MaxentTrainer.java
index 2967bbc..e9524a9 100644
--- a/mallet-addon/src/main/java/opennlp/addons/mallet/MaxentTrainer.java
+++ b/mallet-addon/src/main/java/opennlp/addons/mallet/MaxentTrainer.java
@@ -27,8 +27,17 @@
 import opennlp.tools.ml.AbstractEventTrainer;
 import opennlp.tools.ml.model.DataIndexer;
 import opennlp.tools.ml.model.MaxentModel;
+import cc.mallet.classify.C45Trainer;
 import cc.mallet.classify.Classifier;
+import cc.mallet.classify.MaxEntGETrainer;
+import cc.mallet.classify.MaxEntL1Trainer;
+import cc.mallet.classify.MaxEntPRTrainer;
 import cc.mallet.classify.MaxEntTrainer;
+import cc.mallet.classify.NaiveBayes;
+import cc.mallet.classify.NaiveBayesEMTrainer;
+import cc.mallet.classify.NaiveBayesTrainer;
+import cc.mallet.optimize.LimitedMemoryBFGS;
+import cc.mallet.optimize.Optimizer;
 import cc.mallet.types.Alphabet;
 import cc.mallet.types.FeatureVector;
 import cc.mallet.types.Instance;
@@ -67,22 +76,21 @@
         weights[featureIndex] = indexer.getNumTimesEventsSeen()[contextIndex];
       }
 
-      FeatureVector fv = new FeatureVector(dataAlphabet, malletFeatures,
-          weights);
+      FeatureVector fv = new FeatureVector(dataAlphabet, malletFeatures, weights);
       Instance inst = new Instance(fv, targetAlphabet.lookupLabel(
-          indexer.getOutcomeLabels()[outcomes[contextIndex]], true), "name",
+          indexer.getOutcomeLabels()[outcomes[contextIndex]], true), "fid:" + contextIndex,
           "data-indexer");
       instances.add(inst);
     }
 
     InstanceList trainingData = new InstanceList(dataAlphabet, targetAlphabet);
-    Instance inst = instances.iterator().next();
-
-    Alphabet.alphabetsMatch(trainingData, inst);
+    
     trainingData.addAll(instances);
 
     MaxEntTrainer trainer = new MaxEntTrainer();
-    
+//    trainer.setGaussianPriorVariance(1d);
+//    trainer.setNumIterations(100);
+
     Classifier classifier = trainer.train(trainingData);
 
     return new ClassifierModel(classifier);
diff --git a/opennlp-brat-annotator/pom.xml b/opennlp-brat-annotator/pom.xml
new file mode 100644
index 0000000..c39e4c3
--- /dev/null
+++ b/opennlp-brat-annotator/pom.xml
@@ -0,0 +1,73 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+     http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+	<modelVersion>4.0.0</modelVersion>
+
+	<groupId>org.apache.opennlp</groupId>
+	<artifactId>opennlp-brat-annotator</artifactId>
+	<version>1.0-SNAPSHOT</version>
+	<packaging>jar</packaging>
+
+	<name>opennlp-brat-annotator</name>
+
+	<properties>
+		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+	</properties>
+
+	<dependencies>
+		<dependency>
+			<groupId>org.eclipse.jetty</groupId>
+			<artifactId>jetty-server</artifactId>
+			<version>9.2.3.v20140905</version>
+		</dependency>
+		
+		<dependency>
+			<groupId>org.eclipse.jetty</groupId>
+			<artifactId>jetty-servlet</artifactId>
+			<version>9.2.3.v20140905</version>
+		</dependency>
+
+		<dependency>
+			<groupId>com.sun.jersey</groupId>
+			<artifactId>jersey-servlet</artifactId>
+			<version>1.12</version>
+		</dependency>
+
+		<dependency>
+			<groupId>com.sun.jersey</groupId>
+			<artifactId>jersey-json</artifactId>
+			<version>1.12</version>
+		</dependency>
+
+		<dependency>
+			<groupId>org.apache.opennlp</groupId>
+			<artifactId>opennlp-tools</artifactId>
+			<version>1.6.0</version>
+		</dependency>
+
+		<dependency>
+			<groupId>junit</groupId>
+			<artifactId>junit</artifactId>
+			<version>3.8.1</version>
+			<scope>test</scope>
+		</dependency>
+	</dependencies>
+</project>
diff --git a/opennlp-brat-annotator/src/main/java/opennlp/bratannotator/BratAnnService.java b/opennlp-brat-annotator/src/main/java/opennlp/bratannotator/BratAnnService.java
new file mode 100644
index 0000000..c9b4d60
--- /dev/null
+++ b/opennlp-brat-annotator/src/main/java/opennlp/bratannotator/BratAnnService.java
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.bratannotator;
+
+import java.io.File;
+import java.net.URI;
+import java.net.URL;
+
+import org.eclipse.jetty.server.Server;
+import org.eclipse.jetty.servlet.ServletContextHandler;
+import org.eclipse.jetty.servlet.ServletHolder;
+
+import opennlp.tools.namefind.NameFinderME;
+import opennlp.tools.namefind.TokenNameFinder;
+import opennlp.tools.namefind.TokenNameFinderModel;
+import opennlp.tools.sentdetect.NewlineSentenceDetector;
+import opennlp.tools.sentdetect.SentenceDetector;
+import opennlp.tools.sentdetect.SentenceDetectorME;
+import opennlp.tools.sentdetect.SentenceModel;
+import opennlp.tools.tokenize.SimpleTokenizer;
+import opennlp.tools.tokenize.Tokenizer;
+import opennlp.tools.tokenize.TokenizerME;
+import opennlp.tools.tokenize.TokenizerModel;
+import opennlp.tools.tokenize.WhitespaceTokenizer;
+
+public class BratAnnService {
+  
+  public static SentenceDetector sentenceDetector;
+  public static Tokenizer tokenizer;
+  public static TokenNameFinder nameFinders[];
+  
+  public static void main(String[] args) throws Exception {
+    
+    if (args.length < 3) {
+      System.out.println("sentenceDetectorURI tokenizerURI namefinderURI_1 ... nameFinderURI_n");
+      return;
+    }
+
+    URI sentenceDetectorUri = URI.create(args[0]);
+    if ("sentenceDetector".equals(sentenceDetectorUri.getScheme())) {
+      
+      if ("newline".equals(sentenceDetectorUri.getSchemeSpecificPart())) {
+        sentenceDetector = new NewlineSentenceDetector();
+      }
+      else {
+        System.out.println("unkown sentence detector");
+        return;
+      }
+    }
+    else {
+      sentenceDetector = new SentenceDetectorME(new SentenceModel(new File(args[0])));
+    }
+    
+    URI tokenizerUri = URI.create(args[1]);
+    if ("tokenizer".equals(tokenizerUri.getScheme())) {
+      if ("whitespace".equals(tokenizerUri.getSchemeSpecificPart())) {
+        tokenizer = WhitespaceTokenizer.INSTANCE;
+      }
+      else if ("simple".equals(tokenizerUri.getSchemeSpecificPart())) {
+        tokenizer = SimpleTokenizer.INSTANCE;
+      } 
+      else {
+        System.out.println("unkown sentence detector");
+        return;
+      }
+
+    }
+    else {
+      tokenizer = new TokenizerME(new TokenizerModel(new File(args[1])));
+    }
+    
+    nameFinders = new TokenNameFinder[] {new NameFinderME(new TokenNameFinderModel(new URL(args[2])))};
+    
+    ServletContextHandler context = new ServletContextHandler(
+        ServletContextHandler.SESSIONS);
+    context.setContextPath("/");
+
+    Server jettyServer = new Server(8080);
+    jettyServer.setHandler(context);
+
+    ServletHolder jerseyServlet = context
+        .addServlet(com.sun.jersey.spi.container.servlet.ServletContainer.class, "/*");
+    jerseyServlet.setInitParameter("com.sun.jersey.config.property.packages", "opennlp.bratannotator");
+    jerseyServlet.setInitParameter("com.sun.jersey.api.json.POJOMappingFeature", "true");
+    jerseyServlet.setInitOrder(0);
+
+    jerseyServlet.setInitParameter("jersey.config.server.provider.classnames",
+        BratNameFinderResource.class.getCanonicalName());
+
+    try {
+      jettyServer.start();
+      jettyServer.join();
+    } finally {
+      jettyServer.destroy();
+    }
+  }
+}
diff --git a/opennlp-brat-annotator/src/main/java/opennlp/bratannotator/BratNameFinderResource.java b/opennlp-brat-annotator/src/main/java/opennlp/bratannotator/BratNameFinderResource.java
new file mode 100644
index 0000000..88dacc6
--- /dev/null
+++ b/opennlp-brat-annotator/src/main/java/opennlp/bratannotator/BratNameFinderResource.java
@@ -0,0 +1,148 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.bratannotator;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import javax.ws.rs.Consumes;
+import javax.ws.rs.POST;
+import javax.ws.rs.Path;
+import javax.ws.rs.Produces;
+import javax.ws.rs.QueryParam;
+import javax.ws.rs.core.MediaType;
+
+import opennlp.tools.namefind.TokenNameFinder;
+import opennlp.tools.sentdetect.SentenceDetector;
+import opennlp.tools.tokenize.Tokenizer;
+import opennlp.tools.util.Span;
+
+@Path("/ner")
+public class BratNameFinderResource {
+
+  public static class NameAnn {
+    public int[][] offsets;
+    public String[] texts;
+    public String type;
+  }
+
+  private SentenceDetector sentDetect = BratAnnService.sentenceDetector;
+  private Tokenizer tokenizer = BratAnnService.tokenizer;
+  private TokenNameFinder nameFinders[] = BratAnnService.nameFinders;
+
+  private static int findNextNonWhitespaceChar(CharSequence s, int beginOffset,
+      int endOffset) {
+
+    for (int i = beginOffset; i < endOffset; i++) {
+      if (!Character.isSpaceChar(s.charAt(i))) {
+        return i;
+      }
+    }
+
+    return -1;
+  }
+
+  @POST
+  @Consumes(MediaType.TEXT_PLAIN)
+  @Produces(MediaType.APPLICATION_JSON)
+  public Map<String, NameAnn> findNames(@QueryParam("model") String modelName,
+      String text) {
+
+    Span sentenceSpans[] = sentDetect.sentPosDetect(text);
+
+    Map<String, NameAnn> map = new HashMap<String, NameAnn>();
+
+    int indexCounter = 0;
+
+    for (int i = 0; i < sentenceSpans.length; i++) {
+      
+      String sentenceText = sentenceSpans[i].getCoveredText(text).toString();
+      
+      // offset of sentence gets lost here!
+      Span tokenSpans[] = tokenizer
+          .tokenizePos(sentenceText);
+
+      String tokens[] = Span.spansToStrings(tokenSpans, sentenceText);
+
+      for (TokenNameFinder nameFinder : nameFinders) {
+        Span names[] = nameFinder.find(tokens);
+
+        for (Span name : names) {
+          
+          int beginOffset = tokenSpans[name.getStart()].getStart()
+              + sentenceSpans[i].getStart();
+          int endOffset = tokenSpans[name.getEnd() - 1].getEnd()
+              + sentenceSpans[i].getStart();
+
+          // create a list of new line indexes
+          List<Integer> newLineIndexes = new ArrayList<Integer>();
+
+          // TODO: Code needs to handle case that there are multiple new lines
+          // in a row
+
+          boolean inNewLineSequence = false;
+          for (int ci = beginOffset; ci < endOffset; ci++) {
+            if (text.charAt(ci) == '\n' || text.charAt(ci) == '\r') {
+              if (!inNewLineSequence) {
+                newLineIndexes.add(ci);
+              }
+              inNewLineSequence = true;
+            } else {
+              inNewLineSequence = false;
+            }
+          }
+
+          List<String> textSegments = new ArrayList<String>();
+          List<int[]> spanSegments = new ArrayList<int[]>();
+
+          int segmentBegin = beginOffset;
+
+          for (int newLineOffset : newLineIndexes) {
+            // create segment from begin to offset
+            textSegments.add(text.substring(segmentBegin, newLineOffset));
+            spanSegments.add(new int[] { segmentBegin, newLineOffset });
+
+            segmentBegin = findNextNonWhitespaceChar(text, newLineOffset + 1,
+                endOffset);
+
+            if (segmentBegin == -1) {
+              break;
+            }
+          }
+
+          // create left over segment
+          if (segmentBegin != -1) {
+            textSegments.add(text.substring(segmentBegin, endOffset));
+            spanSegments.add(new int[] { segmentBegin, endOffset });
+          }
+
+          NameAnn ann = new NameAnn();
+          ann.texts = textSegments.toArray(new String[textSegments.size()]);
+          ann.offsets = spanSegments.toArray(new int[spanSegments.size()][]);
+          ann.type = name.getType();
+
+          map.put(Integer.toString(indexCounter++), ann);
+        }
+      }
+    }
+
+    return map;
+  }
+}