OPENNLP-480 Fixed bug in offset handling.
diff --git a/tagging-server/src/main/java/org/apache/opennlp/tagging_server/namefind/NameFinderResource.java b/tagging-server/src/main/java/org/apache/opennlp/tagging_server/namefind/NameFinderResource.java
index 1873b08..2932568 100644
--- a/tagging-server/src/main/java/org/apache/opennlp/tagging_server/namefind/NameFinderResource.java
+++ b/tagging-server/src/main/java/org/apache/opennlp/tagging_server/namefind/NameFinderResource.java
@@ -118,7 +118,11 @@
String[][] tokenizedSentences = new String[sentenceSpans.length][];
for (int i = 0; i < sentenceSpans.length; i++) {
+ // offset of sentence gets lost here!
Span tokenSpans[] = tokenizer.tokenizePos(sentenceSpans[i].getCoveredText(document).toString());
+ // all spans need to be sentence offset adjusted!
+ tokenSpans = offsetSpans(tokenSpans, sentenceSpans[i].getStart());
+
tokenizedSentencesSpan.add(tokenSpans);
String tokens[] = new String[tokenSpans.length];
@@ -137,4 +141,17 @@
ServiceUtil.releaseService(preprocessFactoryService);
}
}
+
+ private Span[] offsetSpans(
+ Span[] tokenSpans, int offset) {
+
+ Span spans[] = new Span[tokenSpans.length];
+
+ for (int i = 0; i < tokenSpans.length; i++) {
+ spans[i] = new Span(tokenSpans[i].getStart() + offset,
+ tokenSpans[i].getEnd() + offset);
+ }
+
+ return spans;
+ }
}