Move brat annotator to opennlp.git OPENNLP-867

commit: 4350f64c009141bd80234113c39d30c61fa47020 [log] [tgz]
author: Jörn Kottmann <joern@apache.org> Wed Oct 19 23:42:13 2016 +0200
committer: Jörn Kottmann <joern@apache.org> Wed Oct 19 23:42:13 2016 +0200
tree: 1c10f6d8699a6b7ad407aa42f995a03199df3018
parent: dce84c0a6cab4caeb1e804f17ca35bb94bb73b8e [diff]
diff --git a/opennlp-brat-annotator/pom.xml b/opennlp-brat-annotator/pom.xml
deleted file mode 100644
index 93e3620..0000000
--- a/opennlp-brat-annotator/pom.xml
+++ /dev/null

@@ -1,88 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor 
-	license agreements. See the NOTICE file distributed with this work for additional 
-	information regarding copyright ownership. The ASF licenses this file to 
-	you under the Apache License, Version 2.0 (the "License"); you may not use 
-	this file except in compliance with the License. You may obtain a copy of 
-	the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required 
-	by applicable law or agreed to in writing, software distributed under the 
-	License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 
-	OF ANY KIND, either express or implied. See the License for the specific 
-	language governing permissions and limitations under the License. -->
-
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-	<modelVersion>4.0.0</modelVersion>
-
-	<groupId>org.apache.opennlp</groupId>
-	<artifactId>opennlp-brat-annotator</artifactId>
-	<version>1.0-SNAPSHOT</version>
-	<packaging>jar</packaging>
-
-	<name>opennlp-brat-annotator</name>
-
-	<properties>
-		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
-	</properties>
-
-	<dependencies>
-		<dependency>
-			<groupId>org.eclipse.jetty</groupId>
-			<artifactId>jetty-server</artifactId>
-			<version>9.2.3.v20140905</version>
-		</dependency>
-
-		<dependency>
-			<groupId>org.eclipse.jetty</groupId>
-			<artifactId>jetty-servlet</artifactId>
-			<version>9.2.3.v20140905</version>
-		</dependency>
-
-		<dependency>
-			<groupId>com.sun.jersey</groupId>
-			<artifactId>jersey-bundle</artifactId>
-			<version>1.18.1</version>
-		</dependency>
-
-		<dependency>
-			<groupId>com.sun.jersey</groupId>
-			<artifactId>jersey-json</artifactId>
-			<version>1.18.1</version>
-		</dependency>
-
-		<dependency>
-			<groupId>org.apache.opennlp</groupId>
-			<artifactId>opennlp-tools</artifactId>
-			<version>1.6.0</version>
-		</dependency>
-
-		<dependency>
-			<groupId>junit</groupId>
-			<artifactId>junit</artifactId>
-			<version>3.8.1</version>
-			<scope>test</scope>
-		</dependency>
-	</dependencies>
-	<build>
-		<plugins>
-			<plugin>
-				<artifactId>maven-assembly-plugin</artifactId>
-				<configuration>
-					<descriptorRefs>
-						<descriptorRef>jar-with-dependencies</descriptorRef>
-					</descriptorRefs>
-				</configuration>
-				<executions>
-					<execution>
-						<id>make-assembly</id>
-						<phase>package</phase>
-						<goals>
-							<goal>single</goal>
-						</goals>
-					</execution>
-				</executions>
-			</plugin>
-		</plugins>
-	</build>
-</project>

diff --git a/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java b/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java
deleted file mode 100644
index 60d2a1b..0000000
--- a/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java
+++ /dev/null

@@ -1,119 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.bratann;
-
-import java.io.File;
-import java.util.Arrays;
-import java.util.List;
-
-import org.eclipse.jetty.server.Server;
-import org.eclipse.jetty.servlet.ServletContextHandler;
-import org.eclipse.jetty.servlet.ServletHolder;
-
-import opennlp.tools.namefind.NameFinderME;
-import opennlp.tools.namefind.TokenNameFinder;
-import opennlp.tools.namefind.TokenNameFinderModel;
-import opennlp.tools.sentdetect.NewlineSentenceDetector;
-import opennlp.tools.sentdetect.SentenceDetector;
-import opennlp.tools.sentdetect.SentenceDetectorME;
-import opennlp.tools.sentdetect.SentenceModel;
-import opennlp.tools.tokenize.SimpleTokenizer;
-import opennlp.tools.tokenize.Tokenizer;
-import opennlp.tools.tokenize.TokenizerME;
-import opennlp.tools.tokenize.TokenizerModel;
-import opennlp.tools.tokenize.WhitespaceTokenizer;
-
-public class NameFinderAnnService {
-
-  public static SentenceDetector sentenceDetector = new NewlineSentenceDetector();;
-  public static Tokenizer tokenizer = WhitespaceTokenizer.INSTANCE;
-  public static TokenNameFinder nameFinders[];
-
-  public static void main(String[] args) throws Exception {
-
-    if (args.length == 0) {
-      System.out.println(
-          "[-serverPort port] [-tokenizerModel file] [-ruleBasedTokenizer whitespace|simple] [-sentenceDetectorModel file] "
-              + "namefinderFile|nameFinderURI");
-      return;
-    }
-
-    List<String> argList = Arrays.asList(args);
-
-    int serverPort = 8080;
-    int serverPortIndex = argList.indexOf("-serverPort") + 1;
-
-    if (serverPortIndex > 0 && serverPortIndex < args.length) {
-      serverPort = Integer.parseInt(args[serverPortIndex]);
-    }
-
-    int sentenceModelIndex = argList.indexOf("-sentenceDetectorModel")
-        + 1;
-    if (sentenceModelIndex > 0 && sentenceModelIndex < args.length) {
-      sentenceDetector = new SentenceDetectorME(
-          new SentenceModel(new File(args[sentenceModelIndex])));
-    }
-
-    int ruleBasedTokenizerIndex = argList.indexOf("-ruleBasedTokenizer") + 1;
-
-    if (ruleBasedTokenizerIndex > 0 && ruleBasedTokenizerIndex < args.length) {
-      if ("whitespace".equals(args[ruleBasedTokenizerIndex])) {
-        tokenizer = WhitespaceTokenizer.INSTANCE;
-      } else if ("simple".equals(args[ruleBasedTokenizerIndex])) {
-        tokenizer = SimpleTokenizer.INSTANCE;
-      } else {
-        System.out
-        .println("unkown tokenizer: " + args[ruleBasedTokenizerIndex]);
-        return;
-      }
-    }
-
-    int tokenizerModelIndex = argList.indexOf("-tokenizerModel") + 1;
-    if (tokenizerModelIndex > 0 && tokenizerModelIndex < args.length) {
-      tokenizer = new TokenizerME(
-          new TokenizerModel(new File(args[tokenizerModelIndex])));
-    }
-
-    nameFinders = new TokenNameFinder[] { new NameFinderME(
-        new TokenNameFinderModel(new File(args[args.length - 1]))) };
-
-    ServletContextHandler context = new ServletContextHandler(
-        ServletContextHandler.SESSIONS);
-    context.setContextPath("/");
-
-    Server jettyServer = new Server(serverPort);
-    jettyServer.setHandler(context);
-
-    ServletHolder jerseyServlet = context
-        .addServlet(com.sun.jersey.spi.container.servlet.ServletContainer.class, "/*");
-    jerseyServlet.setInitParameter("com.sun.jersey.config.property.packages",
-        "opennlp.bratann");
-    jerseyServlet.setInitParameter("com.sun.jersey.api.json.POJOMappingFeature", "true");
-    jerseyServlet.setInitOrder(0);
-
-    jerseyServlet.setInitParameter("jersey.config.server.provider.classnames",
-        NameFinderResource.class.getCanonicalName());
-
-    try {
-      jettyServer.start();
-      jettyServer.join();
-    } finally {
-      jettyServer.destroy();
-    }
-  }
-}

diff --git a/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderResource.java b/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderResource.java
deleted file mode 100644
index 39cec0e..0000000
--- a/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderResource.java
+++ /dev/null

@@ -1,148 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.bratann;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import javax.ws.rs.Consumes;
-import javax.ws.rs.POST;
-import javax.ws.rs.Path;
-import javax.ws.rs.Produces;
-import javax.ws.rs.QueryParam;
-import javax.ws.rs.core.MediaType;
-
-import opennlp.tools.namefind.TokenNameFinder;
-import opennlp.tools.sentdetect.SentenceDetector;
-import opennlp.tools.tokenize.Tokenizer;
-import opennlp.tools.util.Span;
-
-@Path("/ner")
-public class NameFinderResource {
-
-  public static class NameAnn {
-    public int[][] offsets;
-    public String[] texts;
-    public String type;
-  }
-
-  private SentenceDetector sentDetect = NameFinderAnnService.sentenceDetector;
-  private Tokenizer tokenizer = NameFinderAnnService.tokenizer;
-  private TokenNameFinder nameFinders[] = NameFinderAnnService.nameFinders;
-
-  private static int findNextNonWhitespaceChar(CharSequence s, int beginOffset,
-      int endOffset) {
-
-    for (int i = beginOffset; i < endOffset; i++) {
-      if (!Character.isSpaceChar(s.charAt(i))) {
-        return i;
-      }
-    }
-
-    return -1;
-  }
-
-  @POST
-  @Consumes(MediaType.TEXT_PLAIN)
-  @Produces(MediaType.APPLICATION_JSON)
-  public Map<String, NameAnn> findNames(@QueryParam("model") String modelName,
-      String text) {
-
-    Span sentenceSpans[] = sentDetect.sentPosDetect(text);
-
-    Map<String, NameAnn> map = new HashMap<String, NameAnn>();
-
-    int indexCounter = 0;
-
-    for (int i = 0; i < sentenceSpans.length; i++) {
-      
-      String sentenceText = sentenceSpans[i].getCoveredText(text).toString();
-      
-      // offset of sentence gets lost here!
-      Span tokenSpans[] = tokenizer
-          .tokenizePos(sentenceText);
-
-      String tokens[] = Span.spansToStrings(tokenSpans, sentenceText);
-
-      for (TokenNameFinder nameFinder : nameFinders) {
-        Span names[] = nameFinder.find(tokens);
-
-        for (Span name : names) {
-          
-          int beginOffset = tokenSpans[name.getStart()].getStart()
-              + sentenceSpans[i].getStart();
-          int endOffset = tokenSpans[name.getEnd() - 1].getEnd()
-              + sentenceSpans[i].getStart();
-
-          // create a list of new line indexes
-          List<Integer> newLineIndexes = new ArrayList<Integer>();
-
-          // TODO: Code needs to handle case that there are multiple new lines
-          // in a row
-
-          boolean inNewLineSequence = false;
-          for (int ci = beginOffset; ci < endOffset; ci++) {
-            if (text.charAt(ci) == '\n' || text.charAt(ci) == '\r') {
-              if (!inNewLineSequence) {
-                newLineIndexes.add(ci);
-              }
-              inNewLineSequence = true;
-            } else {
-              inNewLineSequence = false;
-            }
-          }
-
-          List<String> textSegments = new ArrayList<String>();
-          List<int[]> spanSegments = new ArrayList<int[]>();
-
-          int segmentBegin = beginOffset;
-
-          for (int newLineOffset : newLineIndexes) {
-            // create segment from begin to offset
-            textSegments.add(text.substring(segmentBegin, newLineOffset));
-            spanSegments.add(new int[] { segmentBegin, newLineOffset });
-
-            segmentBegin = findNextNonWhitespaceChar(text, newLineOffset + 1,
-                endOffset);
-
-            if (segmentBegin == -1) {
-              break;
-            }
-          }
-
-          // create left over segment
-          if (segmentBegin != -1) {
-            textSegments.add(text.substring(segmentBegin, endOffset));
-            spanSegments.add(new int[] { segmentBegin, endOffset });
-          }
-
-          NameAnn ann = new NameAnn();
-          ann.texts = textSegments.toArray(new String[textSegments.size()]);
-          ann.offsets = spanSegments.toArray(new int[spanSegments.size()][]);
-          ann.type = name.getType();
-
-          map.put(Integer.toString(indexCounter++), ann);
-        }
-      }
-    }
-
-    return map;
-  }
-}
commit	4350f64c009141bd80234113c39d30c61fa47020	[log] [tgz]
author	Jörn Kottmann <joern@apache.org>	Wed Oct 19 23:42:13 2016 +0200
committer	Jörn Kottmann <joern@apache.org>	Wed Oct 19 23:42:13 2016 +0200
tree	1c10f6d8699a6b7ad407aa42f995a03199df3018
parent	dce84c0a6cab4caeb1e804f17ca35bb94bb73b8e [diff]