sanitize some TODOs and unhealthy code (#89)
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/DefaultParse.java b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/DefaultParse.java
index 725a213..d3566e1 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/DefaultParse.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/DefaultParse.java
@@ -23,7 +23,6 @@
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
-import java.util.Stack;
import opennlp.tools.parser.Parse;
import opennlp.tools.parser.chunking.Parser;
@@ -56,10 +55,12 @@
// Should we just maintain a parse id map !?
}
+ @Override
public int getSentenceNumber() {
return sentenceNumber;
}
+ @Override
public List<opennlp.tools.coref.mention.Parse> getNamedEntities() {
List<Parse> names = new ArrayList<>();
List<Parse> kids = new LinkedList<>(Arrays.asList(parse.getChildren()));
@@ -75,10 +76,12 @@
return createParses(names.toArray(new Parse[names.size()]));
}
+ @Override
public List<opennlp.tools.coref.mention.Parse> getChildren() {
return createParses(parse.getChildren());
}
+ @Override
public List<opennlp.tools.coref.mention.Parse> getSyntacticChildren() {
List<Parse> kids = new ArrayList<>(Arrays.asList(parse.getChildren()));
for (int ci = 0; ci < kids.size(); ci++) {
@@ -92,6 +95,7 @@
return createParses(kids.toArray(new Parse[kids.size()]));
}
+ @Override
public List<opennlp.tools.coref.mention.Parse> getTokens() {
List<Parse> tokens = new ArrayList<>();
List<Parse> kids = new LinkedList<>(Arrays.asList(parse.getChildren()));
@@ -107,6 +111,7 @@
return createParses(tokens.toArray(new Parse[tokens.size()]));
}
+ @Override
public String getSyntacticType() {
if (ENTITY_SET.contains(parse.getType())) {
return null;
@@ -129,6 +134,7 @@
return newParses;
}
+ @Override
public String getEntityType() {
if (ENTITY_SET.contains(parse.getType())) {
return parse.getType();
@@ -138,6 +144,7 @@
}
}
+ @Override
public boolean isParentNAC() {
Parse parent = parse.getParent();
while (parent != null) {
@@ -149,6 +156,7 @@
return false;
}
+ @Override
public opennlp.tools.coref.mention.Parse getParent() {
Parse parent = parse.getParent();
if (parent == null) {
@@ -159,32 +167,32 @@
}
}
+ @Override
public boolean isNamedEntity() {
// TODO: We should use here a special tag to, where
// the type can be extracted from. Then it just depends
// on the training data and not the values inside NAME_TYPES.
-
- if (ENTITY_SET.contains(parse.getType())) {
- return true;
- }
- else {
- return false;
- }
+
+ return ENTITY_SET.contains(parse.getType());
}
+ @Override
public boolean isNounPhrase() {
return parse.getType().equals("NP") || parse.getType().startsWith("NP#");
}
+ @Override
public boolean isSentence() {
return parse.getType().equals(Parser.TOP_NODE);
}
+ @Override
public boolean isToken() {
return parse.isPosTag();
}
+ @Override
public int getEntityId() {
String type = parse.getType();
@@ -198,16 +206,17 @@
}
}
+ @Override
public Span getSpan() {
return parse.getSpan();
}
+ @Override
public int compareTo(opennlp.tools.coref.mention.Parse p) {
if (p == this) {
return 0;
}
-
if (getSentenceNumber() < p.getSentenceNumber()) {
return -1;
}
@@ -221,11 +230,6 @@
System.out.println("Maybe incorrect measurement!");
- Stack<Parse> parents = new Stack<>();
-
-
-
-
// get parent and update distance
// if match return distance
// if not match do it again
@@ -241,6 +245,7 @@
}
+ @Override
public opennlp.tools.coref.mention.Parse getPreviousToken() {
Parse parent = parse.getParent();
Parse node = parse;
@@ -267,6 +272,7 @@
}
}
+ @Override
public opennlp.tools.coref.mention.Parse getNextToken() {
Parse parent = parse.getParent();
Parse node = parse;
diff --git a/opennlp-coref/src/main/java/opennlp/tools/formats/muc/Muc6FullParseCorefSampleStreamFactory.java b/opennlp-coref/src/main/java/opennlp/tools/formats/muc/Muc6FullParseCorefSampleStreamFactory.java
index d715871..9f5a9d0 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/formats/muc/Muc6FullParseCorefSampleStreamFactory.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/formats/muc/Muc6FullParseCorefSampleStreamFactory.java
@@ -73,6 +73,7 @@
super(Parameters.class);
}
+ @Override
public ObjectStream<CorefSample> create(String[] args) {
Parameters params = ArgumentParser.parse(args, Parameters.class);
@@ -85,7 +86,7 @@
ObjectStream<String> mucDocStream = new FileToStringSampleStream(
new DirectorySampleStream(params.getData(), new FileFilter() {
-
+ @Override
public boolean accept(File file) {
return file.getName().toLowerCase().endsWith(".sgm");
}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/formats/muc/NameFinderCorefEnhancerStream.java b/opennlp-coref/src/main/java/opennlp/tools/formats/muc/NameFinderCorefEnhancerStream.java
index e9e0bc4..4e24777 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/formats/muc/NameFinderCorefEnhancerStream.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/formats/muc/NameFinderCorefEnhancerStream.java
@@ -43,6 +43,7 @@
this.tags = tags;
}
+ @Override
public RawCorefSample read() throws IOException {
RawCorefSample sample = samples.read();
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/JSMLearnerOnLatticeWithAbduction.java b/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/JSMLearnerOnLatticeWithAbduction.java
index 76ae8ed..cd7c818 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/JSMLearnerOnLatticeWithAbduction.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/JSMLearnerOnLatticeWithAbduction.java
@@ -20,25 +20,17 @@
import java.util.Arrays;
import java.util.List;
-public class JSMLearnerOnLatticeWithAbduction extends JSMLearnerOnLatticeWithDeduction{
+public class JSMLearnerOnLatticeWithAbduction extends JSMLearnerOnLatticeWithDeduction {
-
-
-
+ @Override
public JSMDecision buildLearningModel(List<String> posTexts, List<String> negTexts,
String unknown, String[] separationKeywords){
- JSMDecision decision = super.buildLearningModel(posTexts, negTexts, unknown, separationKeywords);
- // verify each hypothesis
- //TODO
- return decision;
-
+ //TODO verify each hypothesis
+ return super.buildLearningModel(posTexts, negTexts, unknown, separationKeywords);
}
-
-
public static void main (String[] args) {
-
String[] posArr = new String[] {"I rent an office space. This office is for my business. I can deduct office rental expense from my business profit to calculate net income. ",
"To run my business, I have to rent an office. The net business profit is calculated as follows. Rental expense needs to be subtracted from revenue. ",
"To store goods for my retail business I rent some space. When I calculate the net income, I take revenue and subtract business expenses such as office rent. ",
@@ -60,10 +52,5 @@
// Finally, do prediction
JSMDecision dec = // may be determined by ...
jsm.buildLearningModel(Arrays.asList(posArr), Arrays.asList(negArr), unknown , new String[]{"property"});
-
-
-
-
-
}
}
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGenerator.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGenerator.java
index b71d0b2..00a6d33 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGenerator.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGenerator.java
@@ -93,8 +93,7 @@
for (HitBase item : searchResult) { // got some text from .html
if (item.getAbstractText() != null
&& !(item.getUrl().indexOf(".pdf") > 0)) { // exclude pdf
- opinionSentencesToAdd
- .add(buildParagraphOfGeneratedText(item, sentence, null));
+ opinionSentencesToAdd.add(buildParagraphOfGeneratedText(item, sentence, null));
}
}
}
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGeneratorSupport.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGeneratorSupport.java
index 4389ab6..0575bbd 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGeneratorSupport.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGeneratorSupport.java
@@ -21,6 +21,7 @@
import java.util.Comparator;
import java.util.List;
import java.util.logging.Logger;
+import java.util.regex.Pattern;
import opennlp.tools.similarity.apps.utils.StringDistanceMeasurer;
import opennlp.tools.similarity.apps.utils.Utils;
@@ -30,15 +31,16 @@
import org.apache.commons.lang.StringUtils;
-/*
- * This class supports content generation by static functions
- *
+/**
+ * This class supports content generation by static functions.
*/
-
public class ContentGeneratorSupport {
private static final Logger LOG = Logger
.getLogger("opennlp.tools.similarity.apps.ContentGeneratorSupport");
+ //TODO - verify regexp!!
+ private static final Pattern SPACES_PATTERN = Pattern.compile("([a-z])(\\s{2,3})([A-Z])");
+
/**
* Takes a sentence and extracts noun phrases and entity names to from search
* queries for finding relevant sentences on the web, which are then subject
@@ -50,10 +52,7 @@
* @return List<String> of search expressions
*/
public static List<String> buildSearchEngineQueryFromSentence(String sentence) {
- ParseTreeChunk matcher = new ParseTreeChunk();
- ParserChunker2MatcherProcessor pos = ParserChunker2MatcherProcessor
- .getInstance();
- List<List<ParseTreeChunk>> sent1GrpLst = null;
+ ParserChunker2MatcherProcessor pos = ParserChunker2MatcherProcessor.getInstance();
List<ParseTreeChunk> nPhrases = pos
.formGroupedPhrasesFromChunksForSentence(sentence).get(0);
@@ -135,10 +134,11 @@
public static String cleanSpacesInCleanedHTMLpage(String pageContent){ //was 4 spaces
//was 3 spaces => now back to 2
- //TODO - verify regexp!!
- pageContent = pageContent.trim().replaceAll("([a-z])(\\s{2,3})([A-Z])", "$1. $3")
- .replace("..", ".").replace(". . .", " ").
- replace(". .",". ").trim(); // sometimes html breaks are converted into ' ' (two spaces), so
+ pageContent = pageContent.trim();
+ pageContent = SPACES_PATTERN.matcher(pageContent).replaceAll("$1. $3")
+ .replace("..", ".").replace(". . .", " ")
+ .replace(". .",". ").trim();
+ // sometimes html breaks are converted into ' ' (two spaces), so
// we need to put '.'
return pageContent;
}
@@ -209,12 +209,11 @@
for (Fragment f2 : fragmList2) {
String sf1 = f1.getResultText();
String sf2 = f2.getResultText();
- if (StringUtils.isEmpty(sf1) || StringUtils.isEmpty(sf1))
+ if (StringUtils.isEmpty(sf1) || StringUtils.isEmpty(sf2))
continue;
if (meas.measureStringDistance(sf1, sf2) > dupeThresh) {
fragmList2Results.remove(f2);
- LOG.info("Removed duplicates from formed fragments list: "
- + sf2);
+ LOG.info("Removed duplicates from formed fragments list: " + sf2);
}
}
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinder.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinder.java
index 45bcbdb..80f02ed 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinder.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinder.java
@@ -311,8 +311,7 @@
* @param hits List<HitBase> of search results objects
* @return List<String> of search results objects where dupes are removed
*/
- public static List<HitBase> removeDuplicatesFromResultantHits(
- List<HitBase> hits) {
+ public static List<HitBase> removeDuplicatesFromResultantHits(List<HitBase> hits) {
StringDistanceMeasurer meas = new StringDistanceMeasurer();
double dupeThresh = // 0.8; // if more similar, then considered dupes was
0.7;
@@ -447,7 +446,7 @@
// or get original snippet
pageSentence = fragment;
if (pageSentence != null)
- pageSentence.replace("_should_find_orig_", "");
+ pageSentence = pageSentence.replace("_should_find_orig_", "");
// resultant sentence SHOULD NOT be longer than for times the size of
// snippet fragment
@@ -463,9 +462,7 @@
+ " " + title, originalSentence);
List<List<ParseTreeChunk>> match = matchRes.getMatchResult();
if (!matchRes.isVerbExists() || matchRes.isImperativeVerb()) {
- System.out
- .println("Rejected Sentence : No verb OR Yes imperative verb :"
- + pageSentence);
+ System.out.println("Rejected Sentence : No verb OR Yes imperative verb :" + pageSentence);
continue;
}
@@ -520,12 +517,9 @@
+ "| with title= " + title);
System.out.println("For fragment = " + fragment);
} else
- System.out
- .println("Rejected sentence due to wrong area at webpage: "
- + pageSentence);
+ System.out.println("Rejected sentence due to wrong area at webpage: " + pageSentence);
} else
- System.out.println("Rejected sentence due to low score: "
- + pageSentence);
+ System.out.println("Rejected sentence due to low score: " + pageSentence);
// }
} catch (Throwable t) {
t.printStackTrace();
@@ -902,63 +896,58 @@
t.printStackTrace();
}
- return result;
-}
-
-public HitBase buildParagraphOfGeneratedText(HitBase item,
- String originalSentence, List<String> sentsAll) {
- List<Fragment> results = new ArrayList<>() ;
-
- Triple<List<String>, String, String[]> fragmentExtractionResults = formCandidateFragmentsForPage(item, originalSentence, sentsAll);
-
- List<String> allFragms = fragmentExtractionResults.getFirst();
-
- for (String fragment : allFragms) {
- String[] candidateSentences = formCandidateSentences(fragment, fragmentExtractionResults);
- if (candidateSentences == null)
- continue;
- Fragment res = verifyCandidateSentencesAndFormParagraph(candidateSentences, item, fragment, originalSentence, sentsAll);
- if (res!=null)
- results.add(res);
-
- }
-
- item.setFragments(results );
- return item;
-}
-
-
-
-
-public static void main(String[] args) {
- RelatedSentenceFinder f = new RelatedSentenceFinder();
-
- List<HitBase> hits;
- try {
- // uncomment the sentence you would like to serve as a seed sentence for
- // content generation for an event description
-
- // uncomment the sentence you would like to serve as a seed sentence for
- // content generation for an event description
- hits = f.generateContentAbout("Albert Einstein"
- // "Britney Spears - The Femme Fatale Tour"
- // "Rush Time Machine",
- // "Blue Man Group" ,
- // "Belly Dance With Zaharah",
- // "Hollander Musicology Lecture: Danielle Fosler-Lussier, Guest Lecturer",
- // "Jazz Master and arguably the most famous jazz musician alive, trumpeter Wynton Marsalis",
- );
- System.out.println(HitBase.toString(hits));
- System.out.println(HitBase.toResultantString(hits));
- // WordFileGenerator.createWordDoc("Essey about Albert Einstein",
- // hits.get(0).getTitle(), hits);
-
- } catch (Exception e) {
- e.printStackTrace();
+ return result;
}
-}
+ public HitBase buildParagraphOfGeneratedText(HitBase item,
+ String originalSentence, List<String> sentsAll) {
+ List<Fragment> results = new ArrayList<>() ;
+
+ Triple<List<String>, String, String[]> fragmentExtractionResults = formCandidateFragmentsForPage(item, originalSentence, sentsAll);
+
+ List<String> allFragms = fragmentExtractionResults.getFirst();
+
+ for (String fragment : allFragms) {
+ String[] candidateSentences = formCandidateSentences(fragment, fragmentExtractionResults);
+ if (candidateSentences == null)
+ continue;
+ Fragment res = verifyCandidateSentencesAndFormParagraph(candidateSentences, item, fragment, originalSentence, sentsAll);
+ if (res!=null)
+ results.add(res);
+
+ }
+ item.setFragments(results);
+ return item;
+ }
+ public static void main(String[] args) {
+ RelatedSentenceFinder f = new RelatedSentenceFinder();
+
+ List<HitBase> hits;
+ try {
+ // uncomment the sentence you would like to serve as a seed sentence for
+ // content generation for an event description
+
+ // uncomment the sentence you would like to serve as a seed sentence for
+ // content generation for an event description
+ hits = f.generateContentAbout("Albert Einstein"
+ // "Britney Spears - The Femme Fatale Tour"
+ // "Rush Time Machine",
+ // "Blue Man Group" ,
+ // "Belly Dance With Zaharah",
+ // "Hollander Musicology Lecture: Danielle Fosler-Lussier, Guest Lecturer",
+ // "Jazz Master and arguably the most famous jazz musician alive, trumpeter Wynton Marsalis",
+ );
+ System.out.println(HitBase.toString(hits));
+ System.out.println(HitBase.toResultantString(hits));
+ // WordFileGenerator.createWordDoc("Essey about Albert Einstein",
+ // hits.get(0).getTitle(), hits);
+
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+
+ }
}
\ No newline at end of file
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinderML.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinderML.java
index a075bc2..dbc93f5 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinderML.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinderML.java
@@ -20,7 +20,6 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
-import java.util.logging.Logger;
import opennlp.tools.similarity.apps.utils.Utils;
import opennlp.tools.textsimilarity.TextProcessor;
@@ -29,8 +28,6 @@
* This class does content generation in ES, DE etc
*/
public class RelatedSentenceFinderML extends RelatedSentenceFinder{
- private static final Logger LOG = Logger.getLogger("opennlp.tools.similarity.apps.RelatedSentenceFinderML");
-
public RelatedSentenceFinderML(int ms, int msr, float thresh, String key) {
this.MAX_STEPS = ms;
@@ -45,7 +42,6 @@
public List<HitBase> generateContentAbout(String sentence) throws Exception {
List<HitBase> opinionSentencesToAdd = new ArrayList<>();
System.out.println(" \n=== Entity to write about = " + sentence);
- List<String> nounPhraseQueries = new ArrayList<>();
List<HitBase> searchResult = yrunner.runSearch(sentence, 100);
if (MAX_SEARCH_RESULTS<searchResult.size())
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/WordDocBuilderSingleImageSearchCall.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/WordDocBuilderSingleImageSearchCall.java
index 79aa5d1..b0eaa29 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/WordDocBuilderSingleImageSearchCall.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/WordDocBuilderSingleImageSearchCall.java
@@ -16,7 +16,6 @@
*/
package opennlp.tools.similarity.apps.solr;
-
import java.io.File;
import java.util.ArrayList;
import java.util.List;
@@ -25,13 +24,15 @@
import net.billylieurance.azuresearch.AzureSearchResultSet;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
+import org.docx4j.openpackaging.parts.WordprocessingML.MainDocumentPart;
import opennlp.tools.similarity.apps.ContentGeneratorSupport;
import opennlp.tools.similarity.apps.Fragment;
import opennlp.tools.similarity.apps.HitBase;
public class WordDocBuilderSingleImageSearchCall extends WordDocBuilder{
-
+
+ @Override
public String buildWordDoc(List<HitBase> content, String title){
String outputDocFilename = absPath+"/written/"+ title.replace(' ','_').replace('\"', ' ').trim()+ ".docx";
@@ -41,20 +42,20 @@
int count=0;
try {
wordMLPackage = WordprocessingMLPackage.createPackage();
- wordMLPackage.getMainDocumentPart().addStyledParagraphOfText("Title", title.toUpperCase());
+ MainDocumentPart mdp = wordMLPackage.getMainDocumentPart();
+ mdp.addStyledParagraphOfText("Title", title.toUpperCase());
for(HitBase para: content){
if (para.getFragments()==null || para.getFragments().size()<1) // no found content in this hit
continue;
try {
if (!para.getTitle().endsWith("..") /*|| StringUtils.isAlphanumeric(para.getTitle())*/){
String sectTitle = ContentGeneratorSupport.getPortionOfTitleWithoutDelimiters(para.getTitle());
- wordMLPackage.getMainDocumentPart().addStyledParagraphOfText("Subtitle",
- sectTitle);
+ mdp.addStyledParagraphOfText("Subtitle", sectTitle);
}
String paraText = para.getFragments().toString().replace("[", "").replace("]", "").replace(" | ", "")
.replace(".,", ".").replace(".\"", "\"").replace(". .", ".")
.replace(",.", ".");
- wordMLPackage.getMainDocumentPart().addParagraphOfText(paraText);
+ mdp.addParagraphOfText(paraText);
try {
addImageByImageURLToPackage(count, wordMLPackage, imageURLs);
@@ -67,33 +68,22 @@
count++;
}
// now add URLs
- wordMLPackage.getMainDocumentPart().addStyledParagraphOfText("Subtitle", "REFERENCES");
+ mdp.addStyledParagraphOfText("Subtitle", "REFERENCES");
for(HitBase para: content){
if (para.getFragments()==null || para.getFragments().size()<1) // no found content in this hit
continue;
try {
- wordMLPackage.getMainDocumentPart().addStyledParagraphOfText("Subtitle",
- para.getTitle());
+ mdp.addStyledParagraphOfText("Subtitle", para.getTitle());
String paraText = para.getUrl();
- wordMLPackage.getMainDocumentPart().addParagraphOfText(paraText);
-
-
+ mdp.addParagraphOfText(paraText);
+
} catch (Exception e) {
e.printStackTrace();
}
}
-
wordMLPackage.save(new File(outputDocFilename));
System.out.println("Finished creating docx ="+outputDocFilename);
- //TODO pdf export
- /*
- FOSettings foSettings = Docx4J.createFOSettings();
- foSettings.setWmlPackage(wordMLPackage);
- OutputStream os = new java.io.FileOutputStream(outputDocFilename.replace(".docx", ".pdf"));
- Docx4J.toFO(foSettings, os, Docx4J.FLAG_NONE);
- System.out.println("Finished creating docx's PDF ="+outputDocFilename);
- */
} catch (Exception e) {
e.printStackTrace();
@@ -102,9 +92,8 @@
return outputDocFilename;
}
- protected void addImageByImageURLToPackage(int count,
- WordprocessingMLPackage wordMLPackage,
- List<String> imageURLs) {
+ protected void addImageByImageURLToPackage(int count, WordprocessingMLPackage wordMLPackage,
+ List<String> imageURLs) {
if (count>imageURLs.size()-1)
return;
@@ -112,7 +101,7 @@
String destinationFile = url.replace("http://", "").replace("/", "_");
saveImageFromTheWeb(url, absPath+IMG_REL_PATH+destinationFile);
File file = new File(absPath+IMG_REL_PATH+destinationFile);
- try {
+ try {
byte[] bytes = convertImageToByteArray(file);
addImageToPackage(wordMLPackage, bytes);
} catch (Exception e) {
@@ -130,20 +119,19 @@
return imageURLs;
}
-
- public static void main(String[] args){
- WordDocBuilderSingleImageSearchCall b = new WordDocBuilderSingleImageSearchCall();
- List<HitBase> content = new ArrayList<>();
- for(int i = 0; i<10; i++){
- HitBase h = new HitBase();
- h.setTitle("albert einstein "+i);
- List<Fragment> frs = new ArrayList<>();
- frs.add(new Fragment(" content "+i, 0));
- h.setFragments(frs);
- content.add(h);
- }
-
- b.buildWordDoc(content, "albert einstein");
- }
+ public static void main(String[] args){
+ WordDocBuilderSingleImageSearchCall b = new WordDocBuilderSingleImageSearchCall();
+ List<HitBase> content = new ArrayList<>();
+ for(int i = 0; i<10; i++){
+ HitBase h = new HitBase();
+ h.setTitle("albert einstein "+i);
+ List<Fragment> frs = new ArrayList<>();
+ frs.add(new Fragment(" content "+i, 0));
+ h.setFragments(frs);
+ content.add(h);
+ }
+
+ b.buildWordDoc(content, "albert einstein");
+ }
}
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxoQuerySnapshotMatcher.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxoQuerySnapshotMatcher.java
index 1be923e..fa205d7 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxoQuerySnapshotMatcher.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxoQuerySnapshotMatcher.java
@@ -19,7 +19,6 @@
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -55,8 +54,7 @@
*/
public int getTaxoScore(String query, String snapshot) {
- lemma_ExtendedAssocWords = (HashMap<String, List<List<String>>>) taxo
- .getLemma_ExtendedAssocWords();
+ lemma_ExtendedAssocWords = taxo.getLemma_ExtendedAssocWords();
query = query.toLowerCase();
snapshot = snapshot.toLowerCase();
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxonomyExtenderViaMebMining.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxonomyExtenderViaMebMining.java
index 2f53a7d..e780330 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxonomyExtenderViaMebMining.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxonomyExtenderViaMebMining.java
@@ -27,7 +27,6 @@
import opennlp.tools.similarity.apps.utils.StringCleaner;
import opennlp.tools.stemmer.PStemmer;
import opennlp.tools.textsimilarity.ParseTreeChunk;
-import opennlp.tools.textsimilarity.ParseTreeChunkListScorer;
import opennlp.tools.textsimilarity.SentencePairMatchResult;
import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor;
@@ -39,11 +38,9 @@
* derived list output map 2) for such manual list of words -> derived list of
* words
*/
-
public class TaxonomyExtenderViaMebMining extends BingQueryRunner {
- private final ParseTreeChunkListScorer parseTreeChunkListScorer = new ParseTreeChunkListScorer();
- ParserChunker2MatcherProcessor sm;
+ private ParserChunker2MatcherProcessor sm;
private Map<String, List<List<String>>> lemma_ExtendedAssocWords = new HashMap<>();
private final Map<List<String>, List<List<String>>> assocWords_ExtendedAssocWords = new HashMap<>();
@@ -145,19 +142,17 @@
List<HitBase> resultList = runSearch(query, numbOfHits);
for (int i = 0; i < resultList.size(); i++) {
- {
- for (int j = i + 1; j < resultList.size(); j++) {
- HitBase h1 = resultList.get(i);
- HitBase h2 = resultList.get(j);
- String snapshot1 = StringCleaner.processSnapshotForMatching(h1
- .getTitle() + " . " + h1.getAbstractText());
- String snapshot2 = StringCleaner.processSnapshotForMatching(h2
- .getTitle() + " . " + h2.getAbstractText());
- SentencePairMatchResult matchRes = sm.assessRelevance(snapshot1,
- snapshot2);
- List<List<ParseTreeChunk>> matchResult = matchRes.getMatchResult();
- genResult.addAll(matchResult);
- }
+ for (int j = i + 1; j < resultList.size(); j++) {
+ HitBase h1 = resultList.get(i);
+ HitBase h2 = resultList.get(j);
+ String snapshot1 = StringCleaner.processSnapshotForMatching(h1
+ .getTitle() + " . " + h1.getAbstractText());
+ String snapshot2 = StringCleaner.processSnapshotForMatching(h2
+ .getTitle() + " . " + h2.getAbstractText());
+ SentencePairMatchResult matchRes = sm.assessRelevance(snapshot1,
+ snapshot2);
+ List<List<ParseTreeChunk>> matchResult = matchRes.getMatchResult();
+ genResult.addAll(matchResult);
}
}
@@ -175,9 +170,7 @@
public static void main(String[] args) {
TaxonomyExtenderViaMebMining self = new TaxonomyExtenderViaMebMining();
- self.extendTaxonomy("src/test/resources/taxonomies/irs_dom.ari", "tax",
- "en");
-
+ self.extendTaxonomy("src/test/resources/taxonomies/irs_dom.ari", "tax", "en");
}
}
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/FileHandler.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/FileHandler.java
deleted file mode 100644
index 21bdafb..0000000
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/FileHandler.java
+++ /dev/null
@@ -1,373 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.tools.similarity.apps.utils;
-
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.ByteArrayOutputStream;
-import java.io.EOFException;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.ObjectInputStream;
-import java.io.ObjectOutputStream;
-import java.io.PrintWriter;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-import java.util.logging.Logger;
-
-/**
- * This class responsible to save data to files as well as read out! It is
- * capable to handle text and binary files.
- */
-public class FileHandler {
-
- private static final Logger LOG = Logger
- .getLogger("opennlp.tools.similarity.apps.utils.FileHandler");
-
- public void writeToTextFile(String data, String filepath, boolean append)
- throws IOException {
- try {
- BufferedWriter out = new BufferedWriter(new FileWriter(filepath, append));
- out.write(data + "\n");
- out.close();
- } catch (IOException e) {
- LOG.severe(e.toString());
- e.printStackTrace();
- }
- }
-
- /**
- * Writes data from an arrayList<String> to a text-file where each line of the
- * text represented by an element in the list.
- *
- * @param list
- * @param filePath
- * @param append
- * @throws Exception
- */
- public void writeToTextFile(ArrayList<String> list, String filePath, boolean append) throws Exception {
- FileWriter outFile;
- Iterator<String> it = list.iterator();
- if (!append) {
- outFile = new FileWriter(filePath);
- PrintWriter out = new PrintWriter(outFile);
- while (it.hasNext()) {
- out.println(it.next());
- }
- outFile.close();
- } else {
- int tmp = 0;
- while (it.hasNext()) {
- if (tmp == 0) {
- appendtofile("\n" + it.next(), filePath);
- } else {
- appendtofile(it.next(), filePath);
- }
- tmp++;
- }
- }
- }
-
- public void writeObjectToFile(Object obj, String filepath, boolean append) {
- if (!isFileOrDirectoryExists(getDirPathfromFullPath(filepath))) {
- createFolder(getDirPathfromFullPath(filepath));
- }
- ObjectOutputStream outputStream;
- try {
- outputStream = new ObjectOutputStream(new FileOutputStream(filepath));
- outputStream.writeObject(obj);
- } catch (IOException e) {
- LOG.severe(e.toString());
- }
- }
-
- public Object readObjectfromFile(String filePath) {
- ObjectInputStream inputStream = null;
- try {
- // Construct the ObjectInputStream object
- inputStream = new ObjectInputStream(new FileInputStream(filePath));
- Object obj;
- while ((obj = inputStream.readObject()) != null) {
- return obj;
- }
- } catch (EOFException ex) { // This exception will be caught when EOF is
- // reached
- LOG.severe("End of file reached.\n" + ex);
- } catch (ClassNotFoundException | IOException ex) {
- LOG.severe(ex.toString());
- } finally {
- // Close the ObjectInputStream
- try {
- if (inputStream != null) {
- inputStream.close();
- }
- } catch (IOException ex) {
- LOG.severe(ex.toString());
- }
- }
- return null;
- }
-
- /**
- * Creates a byte array from any object.
- * <p>
- * I wanted to use it when I write out object to files! (This is not in use
- * right now, I may move it into other class)
- *
- * @param obj
- * @return
- * @throws java.io.IOException
- */
- public byte[] getBytes(Object obj) throws java.io.IOException {
- ByteArrayOutputStream bos = new ByteArrayOutputStream();
- ObjectOutputStream oos = new ObjectOutputStream(bos);
- oos.writeObject(obj);
- oos.flush();
- oos.close();
- bos.close();
- return bos.toByteArray();
- }
-
- /**
- * Fetches all content from a text file, and return it as a String.
- *
- * @return
- */
- public String readFromTextFile(String filePath) {
- StringBuilder contents = new StringBuilder();
- // ...checks on aFile are edited
- File aFile = new File(filePath);
-
- try {
- // use buffering, reading one line at a time
- // FileReader always assumes default encoding is OK!
- // TODO be sure that the default encoding is OK!!!!! Otherwise
- // change it
-
- try (BufferedReader input = new BufferedReader(new FileReader(aFile))) {
- String line; // not declared within while loop
- /*
- * readLine is a bit quirky : it returns the content of a line MINUS the
- * newline. it returns null only for the END of the stream. it returns
- * an empty String if two newlines appear in a row.
- */
- while ((line = input.readLine()) != null) {
- contents.append(line);
- contents.append(System.getProperty("line.separator"));
- }
- }
- } catch (IOException ex) {
- LOG.severe("fileName: " + filePath +"\n " + ex);
- }
- return contents.toString();
- }
-
- /**
- * Reads text file line-wise each line will be an element in the resulting
- * list
- *
- * @param filePath
- * @return
- */
- public List<String> readLinesFromTextFile(String filePath) {
- List<String> lines = new ArrayList<>();
- // ...checks on aFile are edited
- File aFile = new File(filePath);
- try {
- // use buffering, reading one line at a time
- // FileReader always assumes default encoding is OK!
- // TODO be sure that the default encoding is OK!!!!! Otherwise
- // change it
-
- BufferedReader input = new BufferedReader(new FileReader(aFile));
- try {
- String line; // not declared within while loop
- /*
- * readLine is a bit quirky : it returns the content of a line MINUS the
- * newline. it returns null only for the END of the stream. it returns
- * an empty String if two newlines appear in a row.
- */
- while ((line = input.readLine()) != null) {
- lines.add(line);
- }
- } finally {
- input.close();
- }
- } catch (IOException ex) {
- LOG.severe(ex.toString());
- }
- return lines;
- }
-
- private void appendtofile(String data, String filePath) {
- try (BufferedWriter out = new BufferedWriter(new FileWriter(filePath, true))) {
- out.write(data + "\n");
- } catch (IOException e) {
- }
- }
-
- public void createFolder(String path) {
- if (!isFileOrDirectoryExists(path)) {
- File file = new File(path);
- try {
- file.mkdirs();
- } catch (Exception e) {
- LOG.severe("Directory already exists or the file-system is read only");
- }
- }
- }
-
- public boolean isFileOrDirectoryExists(String path) {
- File file = new File(path);
- return file.exists();
- }
-
- /**
- * Separates the directory-path from a full file-path
- *
- * @param filePath
- * @return
- */
- private String getDirPathfromFullPath(String filePath) {
- String dirPath = "";
- if (filePath != null) {
- if (filePath.contains("\\"))
- dirPath = filePath.substring(0, filePath.lastIndexOf("\\"));
- }
- return dirPath;
- }
-
- /**
- * Returns the file-names of the files in a folder (not paths only names) (Not
- * recursive)
- *
- * @param dirPath
- * @return
- */
- public ArrayList<String> getFileNamesInFolder(String dirPath) {
- ArrayList<String> fileNames = new ArrayList<>();
-
- File folder = new File(dirPath);
- File[] listOfFiles = folder.listFiles();
-
- for (File listOfFile : listOfFiles) {
- if (listOfFile.isFile()) {
- fileNames.add(listOfFile.getName());
- } else if (listOfFile.isDirectory()) {
- // TODO if I want to use it recursive I should handle this case
- }
- }
- return fileNames;
- }
-
- public void deleteAllfilesinDir(String dirName) {
- ArrayList<String> fileNameList = getFileNamesInFolder(dirName);
- if (fileNameList != null) {
- for (String s : fileNameList) {
- try {
- deleteFile(dirName + s);
- } catch (IllegalArgumentException e) {
- LOG.severe("No way to delete file: " + dirName + s + "\n" +
- e);
- }
- }
- }
- }
-
- public void deleteFile(String filePath) throws IllegalArgumentException {
- // A File object to represent the filename
- File f = new File(filePath);
- // Make sure the file or directory exists and isn't write protected
- if (!f.exists())
- throw new IllegalArgumentException("Delete: no such file or directory: "
- + filePath);
-
- if (!f.canWrite())
- throw new IllegalArgumentException("Delete: write protected: " + filePath);
- // If it is a directory, make sure it is empty
- if (f.isDirectory()) {
- String[] files = f.list();
- if (files.length > 0)
- throw new IllegalArgumentException("Delete: directory not empty: "
- + filePath);
- }
- // Attempt to delete it
- boolean success = f.delete();
- if (!success)
- throw new IllegalArgumentException("Delete: deletion failed");
- }
-
- public boolean deleteDirectory(File path) {
- if (path.exists()) {
- File[] files = path.listFiles();
- for (File file : files) {
- if (file.isDirectory()) {
- deleteDirectory(file);
- } else {
- file.delete();
- }
- }
- }
- return (path.delete());
- }
-
- /**
- * Returns the absolute-file-paths of the files in a directory (not recursive)
- *
- * @param dirPath
- * @return
- */
- public ArrayList<String> getFilePathsInFolder(String dirPath) {
- ArrayList<String> filePaths = new ArrayList<>();
-
- File folder = new File(dirPath);
- File[] listOfFiles = folder.listFiles();
- if (listOfFiles == null)
- return null;
- for (File listOfFile : listOfFiles) {
- if (listOfFile.isFile()) {
- filePaths.add(listOfFile.getAbsolutePath());
- } else if (listOfFile.isDirectory()) {
- // TODO if I want to use it recursive I should handle this case
- }
- }
- return filePaths;
- }
-
- /**
- * Returns the number of individual files in a directory (Not recursive)
- *
- * @param dirPath
- * @return
- */
- public int getFileNumInFolder(String dirPath) {
- int num;
- try {
- num = getFileNamesInFolder(dirPath).size();
- } catch (Exception e) {
- num = 0;
- }
- return num;
- }
-
-}
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/Utils.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/Utils.java
index bae6357..4fd8a17 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/Utils.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/Utils.java
@@ -17,29 +17,19 @@
package opennlp.tools.similarity.apps.utils;
-import java.awt.Graphics2D;
-import java.awt.geom.AffineTransform;
-import java.awt.image.BufferedImage;
-import java.io.File;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.List;
-import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import javax.imageio.ImageIO;
-
import org.apache.commons.lang.StringUtils;
public class Utils {
- private static final Logger LOG = Logger
- .getLogger("opennlp.tools.similarity.apps.utils.Utils");
-
protected static final ArrayList<String[]> CHARACTER_MAPPINGS = new ArrayList<>();
static {
@@ -237,30 +227,6 @@
}
}
- public static boolean createResizedCopy(String originalImage,
- String newImage, int scaledWidth, int scaledHeight) {
- boolean retVal = true;
- try {
- File o = new File(originalImage);
- BufferedImage bsrc = ImageIO.read(o);
- BufferedImage bdest = new BufferedImage(scaledWidth, scaledHeight,
- BufferedImage.TYPE_INT_RGB);
-
- Graphics2D g = bdest.createGraphics();
- AffineTransform at = AffineTransform.getScaleInstance(
- (double) scaledWidth / bsrc.getWidth(),
- (double) scaledHeight / bsrc.getHeight());
- g.drawRenderedImage(bsrc, at);
- ImageIO.write(bdest, "jpeg", new File(newImage));
-
- } catch (Exception e) {
- retVal = false;
- LOG.severe("Failed creating thumbnail for image: " + originalImage + e);
- }
-
- return retVal;
- }
-
private static int minimum(int a, int b, int c) {
int mi;
@@ -676,7 +642,7 @@
public static boolean isLatinWord(String word) {
for (int i = 0; i < word.length(); i++) {
- int asciiCode = (int) word.charAt(i);
+ int asciiCode = word.charAt(i);
if (asciiCode > 128)
return false;
}
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorTool.java b/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorTool.java
index 98f32bd..0a4554f 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorTool.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorTool.java
@@ -43,13 +43,11 @@
import opennlp.tools.util.ParagraphStream;
import opennlp.tools.util.PlainTextByLineStream;
-/*
- * Command line tool for disambiguator supports MFS for now
- *
+/**
+ * Command line tool for disambiguator supports MFS for now.
*/
public class DisambiguatorTool extends CmdLineTool {
- // TODO CmdLineTool should be an interface not abstract class
@Override
public String getName() {
return "Disambiguator";
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDContextGenerator.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDContextGenerator.java
index 7cc7015..11d8f9e 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDContextGenerator.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDContextGenerator.java
@@ -21,6 +21,7 @@
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
+import java.util.regex.Pattern;
public class IMSWSDContextGenerator implements WSDContextGenerator {
@@ -48,13 +49,14 @@
// TODO consider the windowSize
List<String> contextWords = new ArrayList<>();
+ final Pattern pattern = Pattern.compile("[^a-z_]");
+
for (int i = 0; i < toks.length; i++) {
if (lemmas != null) {
- if (!WSDHelper.STOP_WORDS.contains(toks[i].toLowerCase()) && (index
- != i)) {
+ if (!WSDHelper.STOP_WORDS.contains(toks[i].toLowerCase()) && (index != i)) {
- String lemma = lemmas[i].toLowerCase().replaceAll("[^a-z_]", "")
- .trim();
+ String lemma = lemmas[i].toLowerCase();
+ lemma = pattern.matcher(lemma).replaceAll("").trim();
if (lemma.length() > 1) {
contextWords.add(lemma);
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/OSCCWSDContextGenerator.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/OSCCWSDContextGenerator.java
index 8c52c9d..6e13523 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/OSCCWSDContextGenerator.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/OSCCWSDContextGenerator.java
@@ -24,6 +24,7 @@
import java.util.HashSet;
import java.util.List;
import java.util.Set;
+import java.util.regex.Pattern;
import net.sf.extjwnl.data.Synset;
@@ -38,14 +39,16 @@
// TODO consider windowSize
ArrayList<String> contextClusters = new ArrayList<>();
+ final Pattern pattern = Pattern.compile("[^a-z_]");
+
for (int i = 0; i < toks.length; i++) {
if (lemmas != null) {
if (!WSDHelper.STOP_WORDS.contains(toks[i].toLowerCase()) && (index
!= i)) {
- String lemma = lemmas[i].toLowerCase().replaceAll("[^a-z_]", "")
- .trim();
+ String lemma = lemmas[i].toLowerCase();
+ lemma = pattern.matcher(lemma).replaceAll("").trim();
WordPOS word = new WordPOS(lemma, tags[i]);
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDDefaultParameters.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDDefaultParameters.java
index e65bccb..446b46c 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDDefaultParameters.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDDefaultParameters.java
@@ -98,8 +98,8 @@
this.trainingDataDirectory = trainingDataDirectory;
}
- @Override public boolean areValid() {
- // TODO recheck this pattern
+ @Override
+ public boolean areValid() {
return true;
}
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDModel.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDModel.java
index a51b656..90afbbf 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDModel.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDModel.java
@@ -126,7 +126,8 @@
return true;
}
- @Override protected void validateArtifactMap() throws InvalidFormatException {
+ @Override
+ protected void validateArtifactMap() throws InvalidFormatException {
super.validateArtifactMap();
if (!(artifactMap.get(WSD_MODEL_ENTRY_NAME) instanceof AbstractModel)) {
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSampleStream.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSampleStream.java
index d8667d2..fc060f3 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSampleStream.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSampleStream.java
@@ -61,8 +61,7 @@
} catch (InvalidFormatException e) {
if (LOG.isLoggable(Level.WARNING)) {
- LOG
- .warning("Error during parsing, ignoring sentence: " + sentence);
+ LOG.warning("Error during parsing, ignoring sentence: " + sentence);
}
sample = null; // new WSDSample(new String[]{}, new String[]{},0);
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorFactory.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorFactory.java
index b222f52..f75d9b7 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorFactory.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorFactory.java
@@ -50,7 +50,8 @@
}
}
- @Override public void validateArtifactMap() throws InvalidFormatException {
+ @Override
+ public void validateArtifactMap() throws InvalidFormatException {
// no additional artifacts
}
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorME.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorME.java
index c8aa549..b70bd42 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorME.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorME.java
@@ -115,7 +115,8 @@
return surroundingWordsModel;
}
- @Override public String disambiguate(WSDSample sample) {
+ @Override
+ public String disambiguate(WSDSample sample) {
if (WSDHelper.isRelevantPOSTag(sample.getTargetTag())) {
String wordTag = sample.getTargetWordTag();
@@ -202,6 +203,7 @@
* @param index : the index of the word to disambiguate
* @return an array of the senses of the word to disambiguate
*/
+ @Override
public String disambiguate(String[] tokenizedContext, String[] tokenTags,
String[] lemmas, int index) {
return disambiguate(
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java
index 7ada773..5a2ff78 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java
@@ -36,7 +36,6 @@
private List<String> stems;
private final POS pos;
private String posTag;
- public boolean isTarget = false;
public WordPOS(String word, String tag) throws IllegalArgumentException {
if (word == null || tag == null) {
@@ -82,8 +81,7 @@
try {
indexWord = WSDHelper.getDictionary().lookupIndexWord(pos, word);
if (indexWord == null) {
- WSDHelper
- .print("NULL synset probably a POS tagger mistake ! :: [POS] : "
+ WSDHelper.print("NULL synset probably a POS tagger mistake ! :: [POS] : "
+ pos.getLabel() + " [word] : " + word);
return null;
}
diff --git a/tf-ner-poc/src/main/java/org/apache/opennlp/ModelUtil.java b/tf-ner-poc/src/main/java/org/apache/opennlp/ModelUtil.java
index 76e5c8a..1f5b2d2 100644
--- a/tf-ner-poc/src/main/java/org/apache/opennlp/ModelUtil.java
+++ b/tf-ner-poc/src/main/java/org/apache/opennlp/ModelUtil.java
@@ -29,21 +29,21 @@
public static Path writeModelToTmpDir(InputStream modelIn) throws IOException {
Path tmpDir = Files.createTempDirectory("opennlp2");
- ZipInputStream zis = new ZipInputStream(modelIn);
- ZipEntry zipEntry = zis.getNextEntry();
- while(zipEntry != null){
- Path newFile = tmpDir.resolve(zipEntry.getName());
+ try (ZipInputStream zis = new ZipInputStream(modelIn)) {
+ ZipEntry zipEntry = zis.getNextEntry();
+ while(zipEntry != null){
+ Path newFile = tmpDir.resolve(zipEntry.getName());
- Files.createDirectories(newFile.getParent());
- Files.copy(zis, newFile);
+ Files.createDirectories(newFile.getParent());
+ Files.copy(zis, newFile);
- // TODO: How to delete the tmp directory after we are done loading from it ?!
- newFile.toFile().deleteOnExit();
+ // TODO: How to delete the tmp directory after we are done loading from it ?!
+ newFile.toFile().deleteOnExit();
- zipEntry = zis.getNextEntry();
+ zipEntry = zis.getNextEntry();
+ }
+ zis.closeEntry();
}
- zis.closeEntry();
- zis.close();
return tmpDir;
}
diff --git a/tf-ner-poc/src/test/java/org/apache/opennlp/namefinder/PredictTest.java b/tf-ner-poc/src/test/java/org/apache/opennlp/namefinder/PredictTest.java
index 4c7b906..4501630 100644
--- a/tf-ner-poc/src/test/java/org/apache/opennlp/namefinder/PredictTest.java
+++ b/tf-ner-poc/src/test/java/org/apache/opennlp/namefinder/PredictTest.java
@@ -19,20 +19,24 @@
package org.apache.opennlp.namefinder;
+import java.io.IOException;
+import java.nio.file.Path;
+
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import opennlp.tools.util.Span;
-import java.io.IOException;
-import java.nio.file.Path;
-
class PredictTest {
+ // Note: As of Feb 2023, this test won't work on all platforms and, for instance, fails with
+ // "Cannot find TensorFlow native library for OS: darwin, architecture: aarch64"
+ // That's why it is disabled via the architecture system property.
+ // @DisabledIfSystemProperty(named = "os.arch", matches = "aarch64")
@Test
- @Disabled // TODO This test is not platform neutral and, for instance, fails with
- // "Cannot find TensorFlow native library for OS: darwin, architecture: aarch64"
- // We need JUnit 5 in the sandbox to circumvent this, so it can be run in supported environments
+ @Disabled
+ // TODO This test won't work as the required TF model is missing and needs to be re-trained.
+ // Further details, see: https://github.com/apache/opennlp-sandbox/pull/89
void testFindTokens() throws IOException {
// can be changed to File or InputStream