| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package opennlp.bratannotator; |
| |
| import java.util.ArrayList; |
| import java.util.HashMap; |
| import java.util.List; |
| import java.util.Map; |
| |
| import javax.ws.rs.Consumes; |
| import javax.ws.rs.POST; |
| import javax.ws.rs.Path; |
| import javax.ws.rs.Produces; |
| import javax.ws.rs.QueryParam; |
| import javax.ws.rs.core.MediaType; |
| |
| import opennlp.tools.namefind.TokenNameFinder; |
| import opennlp.tools.sentdetect.SentenceDetector; |
| import opennlp.tools.tokenize.Tokenizer; |
| import opennlp.tools.util.Span; |
| |
| @Path("/ner") |
| public class BratNameFinderResource { |
| |
| public static class NameAnn { |
| public int[][] offsets; |
| public String[] texts; |
| public String type; |
| } |
| |
| private SentenceDetector sentDetect = BratAnnService.sentenceDetector; |
| private Tokenizer tokenizer = BratAnnService.tokenizer; |
| private TokenNameFinder nameFinders[] = BratAnnService.nameFinders; |
| |
| private static int findNextNonWhitespaceChar(CharSequence s, int beginOffset, |
| int endOffset) { |
| |
| for (int i = beginOffset; i < endOffset; i++) { |
| if (!Character.isSpaceChar(s.charAt(i))) { |
| return i; |
| } |
| } |
| |
| return -1; |
| } |
| |
| @POST |
| @Consumes(MediaType.TEXT_PLAIN) |
| @Produces(MediaType.APPLICATION_JSON) |
| public Map<String, NameAnn> findNames(@QueryParam("model") String modelName, |
| String text) { |
| |
| Span sentenceSpans[] = sentDetect.sentPosDetect(text); |
| |
| Map<String, NameAnn> map = new HashMap<String, NameAnn>(); |
| |
| int indexCounter = 0; |
| |
| for (int i = 0; i < sentenceSpans.length; i++) { |
| |
| String sentenceText = sentenceSpans[i].getCoveredText(text).toString(); |
| |
| // offset of sentence gets lost here! |
| Span tokenSpans[] = tokenizer |
| .tokenizePos(sentenceText); |
| |
| String tokens[] = Span.spansToStrings(tokenSpans, sentenceText); |
| |
| for (TokenNameFinder nameFinder : nameFinders) { |
| Span names[] = nameFinder.find(tokens); |
| |
| for (Span name : names) { |
| |
| int beginOffset = tokenSpans[name.getStart()].getStart() |
| + sentenceSpans[i].getStart(); |
| int endOffset = tokenSpans[name.getEnd() - 1].getEnd() |
| + sentenceSpans[i].getStart(); |
| |
| // create a list of new line indexes |
| List<Integer> newLineIndexes = new ArrayList<Integer>(); |
| |
| // TODO: Code needs to handle case that there are multiple new lines |
| // in a row |
| |
| boolean inNewLineSequence = false; |
| for (int ci = beginOffset; ci < endOffset; ci++) { |
| if (text.charAt(ci) == '\n' || text.charAt(ci) == '\r') { |
| if (!inNewLineSequence) { |
| newLineIndexes.add(ci); |
| } |
| inNewLineSequence = true; |
| } else { |
| inNewLineSequence = false; |
| } |
| } |
| |
| List<String> textSegments = new ArrayList<String>(); |
| List<int[]> spanSegments = new ArrayList<int[]>(); |
| |
| int segmentBegin = beginOffset; |
| |
| for (int newLineOffset : newLineIndexes) { |
| // create segment from begin to offset |
| textSegments.add(text.substring(segmentBegin, newLineOffset)); |
| spanSegments.add(new int[] { segmentBegin, newLineOffset }); |
| |
| segmentBegin = findNextNonWhitespaceChar(text, newLineOffset + 1, |
| endOffset); |
| |
| if (segmentBegin == -1) { |
| break; |
| } |
| } |
| |
| // create left over segment |
| if (segmentBegin != -1) { |
| textSegments.add(text.substring(segmentBegin, endOffset)); |
| spanSegments.add(new int[] { segmentBegin, endOffset }); |
| } |
| |
| NameAnn ann = new NameAnn(); |
| ann.texts = textSegments.toArray(new String[textSegments.size()]); |
| ann.offsets = spanSegments.toArray(new int[spanSegments.size()][]); |
| ann.type = name.getType(); |
| |
| map.put(Integer.toString(indexCounter++), ann); |
| } |
| } |
| } |
| |
| return map; |
| } |
| } |