Move brat annotator to opennlp.git
OPENNLP-867
diff --git a/opennlp-brat-annotator/pom.xml b/opennlp-brat-annotator/pom.xml
deleted file mode 100644
index 93e3620..0000000
--- a/opennlp-brat-annotator/pom.xml
+++ /dev/null
@@ -1,88 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
- license agreements. See the NOTICE file distributed with this work for additional
- information regarding copyright ownership. The ASF licenses this file to
- you under the Apache License, Version 2.0 (the "License"); you may not use
- this file except in compliance with the License. You may obtain a copy of
- the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
- by applicable law or agreed to in writing, software distributed under the
- License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
- OF ANY KIND, either express or implied. See the License for the specific
- language governing permissions and limitations under the License. -->
-
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
- <modelVersion>4.0.0</modelVersion>
-
- <groupId>org.apache.opennlp</groupId>
- <artifactId>opennlp-brat-annotator</artifactId>
- <version>1.0-SNAPSHOT</version>
- <packaging>jar</packaging>
-
- <name>opennlp-brat-annotator</name>
-
- <properties>
- <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
- </properties>
-
- <dependencies>
- <dependency>
- <groupId>org.eclipse.jetty</groupId>
- <artifactId>jetty-server</artifactId>
- <version>9.2.3.v20140905</version>
- </dependency>
-
- <dependency>
- <groupId>org.eclipse.jetty</groupId>
- <artifactId>jetty-servlet</artifactId>
- <version>9.2.3.v20140905</version>
- </dependency>
-
- <dependency>
- <groupId>com.sun.jersey</groupId>
- <artifactId>jersey-bundle</artifactId>
- <version>1.18.1</version>
- </dependency>
-
- <dependency>
- <groupId>com.sun.jersey</groupId>
- <artifactId>jersey-json</artifactId>
- <version>1.18.1</version>
- </dependency>
-
- <dependency>
- <groupId>org.apache.opennlp</groupId>
- <artifactId>opennlp-tools</artifactId>
- <version>1.6.0</version>
- </dependency>
-
- <dependency>
- <groupId>junit</groupId>
- <artifactId>junit</artifactId>
- <version>3.8.1</version>
- <scope>test</scope>
- </dependency>
- </dependencies>
- <build>
- <plugins>
- <plugin>
- <artifactId>maven-assembly-plugin</artifactId>
- <configuration>
- <descriptorRefs>
- <descriptorRef>jar-with-dependencies</descriptorRef>
- </descriptorRefs>
- </configuration>
- <executions>
- <execution>
- <id>make-assembly</id>
- <phase>package</phase>
- <goals>
- <goal>single</goal>
- </goals>
- </execution>
- </executions>
- </plugin>
- </plugins>
- </build>
-</project>
diff --git a/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java b/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java
deleted file mode 100644
index 60d2a1b..0000000
--- a/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.bratann;
-
-import java.io.File;
-import java.util.Arrays;
-import java.util.List;
-
-import org.eclipse.jetty.server.Server;
-import org.eclipse.jetty.servlet.ServletContextHandler;
-import org.eclipse.jetty.servlet.ServletHolder;
-
-import opennlp.tools.namefind.NameFinderME;
-import opennlp.tools.namefind.TokenNameFinder;
-import opennlp.tools.namefind.TokenNameFinderModel;
-import opennlp.tools.sentdetect.NewlineSentenceDetector;
-import opennlp.tools.sentdetect.SentenceDetector;
-import opennlp.tools.sentdetect.SentenceDetectorME;
-import opennlp.tools.sentdetect.SentenceModel;
-import opennlp.tools.tokenize.SimpleTokenizer;
-import opennlp.tools.tokenize.Tokenizer;
-import opennlp.tools.tokenize.TokenizerME;
-import opennlp.tools.tokenize.TokenizerModel;
-import opennlp.tools.tokenize.WhitespaceTokenizer;
-
-public class NameFinderAnnService {
-
- public static SentenceDetector sentenceDetector = new NewlineSentenceDetector();;
- public static Tokenizer tokenizer = WhitespaceTokenizer.INSTANCE;
- public static TokenNameFinder nameFinders[];
-
- public static void main(String[] args) throws Exception {
-
- if (args.length == 0) {
- System.out.println(
- "[-serverPort port] [-tokenizerModel file] [-ruleBasedTokenizer whitespace|simple] [-sentenceDetectorModel file] "
- + "namefinderFile|nameFinderURI");
- return;
- }
-
- List<String> argList = Arrays.asList(args);
-
- int serverPort = 8080;
- int serverPortIndex = argList.indexOf("-serverPort") + 1;
-
- if (serverPortIndex > 0 && serverPortIndex < args.length) {
- serverPort = Integer.parseInt(args[serverPortIndex]);
- }
-
- int sentenceModelIndex = argList.indexOf("-sentenceDetectorModel")
- + 1;
- if (sentenceModelIndex > 0 && sentenceModelIndex < args.length) {
- sentenceDetector = new SentenceDetectorME(
- new SentenceModel(new File(args[sentenceModelIndex])));
- }
-
- int ruleBasedTokenizerIndex = argList.indexOf("-ruleBasedTokenizer") + 1;
-
- if (ruleBasedTokenizerIndex > 0 && ruleBasedTokenizerIndex < args.length) {
- if ("whitespace".equals(args[ruleBasedTokenizerIndex])) {
- tokenizer = WhitespaceTokenizer.INSTANCE;
- } else if ("simple".equals(args[ruleBasedTokenizerIndex])) {
- tokenizer = SimpleTokenizer.INSTANCE;
- } else {
- System.out
- .println("unkown tokenizer: " + args[ruleBasedTokenizerIndex]);
- return;
- }
- }
-
- int tokenizerModelIndex = argList.indexOf("-tokenizerModel") + 1;
- if (tokenizerModelIndex > 0 && tokenizerModelIndex < args.length) {
- tokenizer = new TokenizerME(
- new TokenizerModel(new File(args[tokenizerModelIndex])));
- }
-
- nameFinders = new TokenNameFinder[] { new NameFinderME(
- new TokenNameFinderModel(new File(args[args.length - 1]))) };
-
- ServletContextHandler context = new ServletContextHandler(
- ServletContextHandler.SESSIONS);
- context.setContextPath("/");
-
- Server jettyServer = new Server(serverPort);
- jettyServer.setHandler(context);
-
- ServletHolder jerseyServlet = context
- .addServlet(com.sun.jersey.spi.container.servlet.ServletContainer.class, "/*");
- jerseyServlet.setInitParameter("com.sun.jersey.config.property.packages",
- "opennlp.bratann");
- jerseyServlet.setInitParameter("com.sun.jersey.api.json.POJOMappingFeature", "true");
- jerseyServlet.setInitOrder(0);
-
- jerseyServlet.setInitParameter("jersey.config.server.provider.classnames",
- NameFinderResource.class.getCanonicalName());
-
- try {
- jettyServer.start();
- jettyServer.join();
- } finally {
- jettyServer.destroy();
- }
- }
-}
diff --git a/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderResource.java b/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderResource.java
deleted file mode 100644
index 39cec0e..0000000
--- a/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderResource.java
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.bratann;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import javax.ws.rs.Consumes;
-import javax.ws.rs.POST;
-import javax.ws.rs.Path;
-import javax.ws.rs.Produces;
-import javax.ws.rs.QueryParam;
-import javax.ws.rs.core.MediaType;
-
-import opennlp.tools.namefind.TokenNameFinder;
-import opennlp.tools.sentdetect.SentenceDetector;
-import opennlp.tools.tokenize.Tokenizer;
-import opennlp.tools.util.Span;
-
-@Path("/ner")
-public class NameFinderResource {
-
- public static class NameAnn {
- public int[][] offsets;
- public String[] texts;
- public String type;
- }
-
- private SentenceDetector sentDetect = NameFinderAnnService.sentenceDetector;
- private Tokenizer tokenizer = NameFinderAnnService.tokenizer;
- private TokenNameFinder nameFinders[] = NameFinderAnnService.nameFinders;
-
- private static int findNextNonWhitespaceChar(CharSequence s, int beginOffset,
- int endOffset) {
-
- for (int i = beginOffset; i < endOffset; i++) {
- if (!Character.isSpaceChar(s.charAt(i))) {
- return i;
- }
- }
-
- return -1;
- }
-
- @POST
- @Consumes(MediaType.TEXT_PLAIN)
- @Produces(MediaType.APPLICATION_JSON)
- public Map<String, NameAnn> findNames(@QueryParam("model") String modelName,
- String text) {
-
- Span sentenceSpans[] = sentDetect.sentPosDetect(text);
-
- Map<String, NameAnn> map = new HashMap<String, NameAnn>();
-
- int indexCounter = 0;
-
- for (int i = 0; i < sentenceSpans.length; i++) {
-
- String sentenceText = sentenceSpans[i].getCoveredText(text).toString();
-
- // offset of sentence gets lost here!
- Span tokenSpans[] = tokenizer
- .tokenizePos(sentenceText);
-
- String tokens[] = Span.spansToStrings(tokenSpans, sentenceText);
-
- for (TokenNameFinder nameFinder : nameFinders) {
- Span names[] = nameFinder.find(tokens);
-
- for (Span name : names) {
-
- int beginOffset = tokenSpans[name.getStart()].getStart()
- + sentenceSpans[i].getStart();
- int endOffset = tokenSpans[name.getEnd() - 1].getEnd()
- + sentenceSpans[i].getStart();
-
- // create a list of new line indexes
- List<Integer> newLineIndexes = new ArrayList<Integer>();
-
- // TODO: Code needs to handle case that there are multiple new lines
- // in a row
-
- boolean inNewLineSequence = false;
- for (int ci = beginOffset; ci < endOffset; ci++) {
- if (text.charAt(ci) == '\n' || text.charAt(ci) == '\r') {
- if (!inNewLineSequence) {
- newLineIndexes.add(ci);
- }
- inNewLineSequence = true;
- } else {
- inNewLineSequence = false;
- }
- }
-
- List<String> textSegments = new ArrayList<String>();
- List<int[]> spanSegments = new ArrayList<int[]>();
-
- int segmentBegin = beginOffset;
-
- for (int newLineOffset : newLineIndexes) {
- // create segment from begin to offset
- textSegments.add(text.substring(segmentBegin, newLineOffset));
- spanSegments.add(new int[] { segmentBegin, newLineOffset });
-
- segmentBegin = findNextNonWhitespaceChar(text, newLineOffset + 1,
- endOffset);
-
- if (segmentBegin == -1) {
- break;
- }
- }
-
- // create left over segment
- if (segmentBegin != -1) {
- textSegments.add(text.substring(segmentBegin, endOffset));
- spanSegments.add(new int[] { segmentBegin, endOffset });
- }
-
- NameAnn ann = new NameAnn();
- ann.texts = textSegments.toArray(new String[textSegments.size()]);
- ann.offsets = spanSegments.toArray(new int[spanSegments.size()][]);
- ann.type = name.getType();
-
- map.put(Integer.toString(indexCounter++), ann);
- }
- }
- }
-
- return map;
- }
-}