conducts code cleanup of some sandbox classes (#109)
removes unnecessary duplicates
modernizes some code style towards Java 8+
adds missing break statements in loops to terminate earlier
simplifies some methods
fixes problematic JavaDoc
removes unused imports
diff --git a/caseditor-corpus-server-plugin/src/main/java/org/apache/opennlp/corpus_server/caseditor/CorpusExplorerView.java b/caseditor-corpus-server-plugin/src/main/java/org/apache/opennlp/corpus_server/caseditor/CorpusExplorerView.java
index 352d115..dbb193a 100644
--- a/caseditor-corpus-server-plugin/src/main/java/org/apache/opennlp/corpus_server/caseditor/CorpusExplorerView.java
+++ b/caseditor-corpus-server-plugin/src/main/java/org/apache/opennlp/corpus_server/caseditor/CorpusExplorerView.java
@@ -24,16 +24,12 @@
import org.eclipse.jface.layout.GridDataFactory;
import org.eclipse.jface.preference.IPreferenceStore;
import org.eclipse.jface.viewers.ILabelProviderListener;
-import org.eclipse.jface.viewers.IOpenListener;
import org.eclipse.jface.viewers.ITableLabelProvider;
-import org.eclipse.jface.viewers.OpenEvent;
import org.eclipse.jface.viewers.StructuredSelection;
import org.eclipse.jface.viewers.TableViewer;
import org.eclipse.swt.SWT;
import org.eclipse.swt.events.KeyEvent;
import org.eclipse.swt.events.KeyListener;
-import org.eclipse.swt.events.ModifyEvent;
-import org.eclipse.swt.events.ModifyListener;
import org.eclipse.swt.events.SelectionEvent;
import org.eclipse.swt.events.SelectionListener;
import org.eclipse.swt.graphics.Image;
diff --git a/caseditor-corpus-server-plugin/src/main/java/org/apache/opennlp/corpus_server/caseditor/TaskQueueView.java b/caseditor-corpus-server-plugin/src/main/java/org/apache/opennlp/corpus_server/caseditor/TaskQueueView.java
index 53c8d87..418a9a8 100644
--- a/caseditor-corpus-server-plugin/src/main/java/org/apache/opennlp/corpus_server/caseditor/TaskQueueView.java
+++ b/caseditor-corpus-server-plugin/src/main/java/org/apache/opennlp/corpus_server/caseditor/TaskQueueView.java
@@ -28,9 +28,7 @@
import org.eclipse.jface.layout.GridDataFactory;
import org.eclipse.jface.viewers.ILabelProviderListener;
-import org.eclipse.jface.viewers.IOpenListener;
import org.eclipse.jface.viewers.ITableLabelProvider;
-import org.eclipse.jface.viewers.OpenEvent;
import org.eclipse.jface.viewers.StructuredSelection;
import org.eclipse.jface.viewers.TableViewer;
import org.eclipse.swt.SWT;
diff --git a/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/TypeInputDialog.java b/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/TypeInputDialog.java
index 6c37546..a903da4 100644
--- a/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/TypeInputDialog.java
+++ b/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/TypeInputDialog.java
@@ -18,7 +18,6 @@
package org.apache.opennlp.caseditor;
import org.apache.uima.cas.TypeSystem;
-import org.eclipse.jface.dialogs.IInputValidator;
import org.eclipse.jface.dialogs.InputDialog;
import org.eclipse.swt.widgets.Shell;
diff --git a/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/ConfirmedNameDetectionFieldEditor.java b/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/ConfirmedNameDetectionFieldEditor.java
index e70ec1a..8eb4346 100644
--- a/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/ConfirmedNameDetectionFieldEditor.java
+++ b/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/ConfirmedNameDetectionFieldEditor.java
@@ -23,9 +23,7 @@
import org.eclipse.swt.layout.GridLayout;
import org.eclipse.swt.widgets.Button;
import org.eclipse.swt.widgets.Composite;
-import org.eclipse.swt.widgets.Event;
import org.eclipse.swt.widgets.Group;
-import org.eclipse.swt.widgets.Listener;
import static org.apache.opennlp.caseditor.OpenNLPPreferenceConstants.*;
diff --git a/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderViewPage.java b/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderViewPage.java
index 1e86482..312e604 100644
--- a/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderViewPage.java
+++ b/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderViewPage.java
@@ -32,8 +32,6 @@
import org.eclipse.jface.action.IToolBarManager;
import org.eclipse.jface.preference.IPreferenceStore;
import org.eclipse.jface.viewers.ISelection;
-import org.eclipse.jface.viewers.ISelectionChangedListener;
-import org.eclipse.jface.viewers.SelectionChangedEvent;
import org.eclipse.jface.viewers.StructuredSelection;
import org.eclipse.jface.viewers.TableViewer;
import org.eclipse.jface.viewers.TableViewerColumn;
diff --git a/corpus-server/corpus-server-impl/src/main/java/org/apache/opennlp/corpus_server/impl/LuceneSearchService.java b/corpus-server/corpus-server-impl/src/main/java/org/apache/opennlp/corpus_server/impl/LuceneSearchService.java
index d4b3672..d8fceca 100644
--- a/corpus-server/corpus-server-impl/src/main/java/org/apache/opennlp/corpus_server/impl/LuceneSearchService.java
+++ b/corpus-server/corpus-server-impl/src/main/java/org/apache/opennlp/corpus_server/impl/LuceneSearchService.java
@@ -105,37 +105,18 @@
File mappingTmpFile = File.createTempFile("lucas-mapping", corpusId + ".xml");
mappingTmpFile.deleteOnExit();
- InputStream mappingFileIn = new ByteArrayInputStream(corpusStore.getIndexMapping());
- OutputStream mappingTmpOut = null;
-
- try {
- mappingTmpOut = new FileOutputStream(mappingTmpFile);
-
+ try (InputStream mappingFileIn = new ByteArrayInputStream(corpusStore.getIndexMapping());
+ OutputStream mappingTmpOut = new FileOutputStream(mappingTmpFile)) {
+
byte[] buffer = new byte[1024];
int len;
while ((len = mappingFileIn.read(buffer)) > 0) {
mappingTmpOut.write(buffer, 0, len);
}
- }
- catch (IOException e) {
+ } catch (IOException e) {
// TODO: Or just ignore it ?! and do not create the indexer for this corpus?!
throw e;
}
- finally {
- if (mappingFileIn != null) {
- try {
- mappingFileIn.close();
- }
- catch (IOException e) {}
- }
-
- if (mappingTmpOut != null) {
- try {
- mappingTmpOut.close();
- }
- catch (IOException e) {}
- }
- }
specifier.getAnalysisEngineMetaData().
getConfigurationParameterSettings().setParameterValue("mappingFile",
diff --git a/nlp-utils/src/main/java/org/apache/opennlp/utils/languagemodel/NaiveSentenceLanguageModel.java b/nlp-utils/src/main/java/org/apache/opennlp/utils/languagemodel/NaiveSentenceLanguageModel.java
index d29b2e5..6700cda 100644
--- a/nlp-utils/src/main/java/org/apache/opennlp/utils/languagemodel/NaiveSentenceLanguageModel.java
+++ b/nlp-utils/src/main/java/org/apache/opennlp/utils/languagemodel/NaiveSentenceLanguageModel.java
@@ -29,7 +29,7 @@
@Override
public double calculateProbability(Collection<T[]> vocabulary, T[] sentence) {
- return vocabulary.isEmpty() ? 0 : Collections.frequency(vocabulary, sentence) / vocabulary.size();
+ return vocabulary.isEmpty() ? 0 : (double) Collections.frequency(vocabulary, sentence) / vocabulary.size();
}
}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/ResolverUtils.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/ResolverUtils.java
index 2cd657a..31fe435 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/ResolverUtils.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/ResolverUtils.java
@@ -547,10 +547,7 @@
*/
public static boolean definiteArticle(String tok, String tag) {
tok = tok.toLowerCase();
- if (tok.equals("the") || tok.equals("these") || tok.equals("these") || tag.equals("PRP$")) {
- return (true);
- }
- return (false);
+ return tok.equals("the") || tok.equals("these") || tok.equals("those") || tag.equals("PRP$");
}
public static String getNumberCompatibilityFeature(MentionContext ec, DiscourseEntity de) {
diff --git a/opennlp-coref/src/main/java/opennlp/tools/formats/muc/Muc6FullParseCorefSampleStreamFactory.java b/opennlp-coref/src/main/java/opennlp/tools/formats/muc/Muc6FullParseCorefSampleStreamFactory.java
index b83c522..f6206b6 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/formats/muc/Muc6FullParseCorefSampleStreamFactory.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/formats/muc/Muc6FullParseCorefSampleStreamFactory.java
@@ -18,7 +18,6 @@
package opennlp.tools.formats.muc;
import java.io.File;
-import java.io.FileFilter;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
diff --git a/opennlp-dl/src/main/java/opennlp/tools/dl/NeuralDocCatTrainer.java b/opennlp-dl/src/main/java/opennlp/tools/dl/NeuralDocCatTrainer.java
index dd2c39c..f5d0c07 100644
--- a/opennlp-dl/src/main/java/opennlp/tools/dl/NeuralDocCatTrainer.java
+++ b/opennlp-dl/src/main/java/opennlp/tools/dl/NeuralDocCatTrainer.java
@@ -52,6 +52,8 @@
*/
public class NeuralDocCatTrainer {
+ // Note: Arguments can't be declared 'final'
+ // See: https://github.com/apache/opennlp-sandbox/pull/109#discussion_r1262904650
public static class Args {
@Option(name = "-batchSize", usage = "Number of examples in minibatch")
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/object_dedup/SimilarityAccessorBase.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/object_dedup/SimilarityAccessorBase.java
index a69ba08..7ec4c26 100755
--- a/opennlp-similarity/src/main/java/opennlp/tools/apps/object_dedup/SimilarityAccessorBase.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/object_dedup/SimilarityAccessorBase.java
@@ -95,7 +95,7 @@
public List<String> getWordsThatShouldBeOnBothSidesEvents()
{
-/*
+ /*
names.addAll(Arrays.asList(new String[] { "woman", "man", "women", "men", "womans", "mans", "womens", "mens",
"boy", "girl", "boys", "girls", "men's", "women's", "woman's", "ice", // for disney
"flight", "intermediate", "advanced", "beginner",
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/PhraseProcessor.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/PhraseProcessor.java
index 8b00ff9..013ad90 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/PhraseProcessor.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/PhraseProcessor.java
@@ -143,7 +143,7 @@
&& phraseBeingFormed )
break;
else if (ch.getPOSs().get(i).startsWith("DT") || ch.getPOSs().get(i).startsWith("CC"))
- continue;
+ continue;
}
query = new StringBuilder(query.toString().trim());
int len = query.toString().split(" ").length;
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MachineTranslationWrapper.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MachineTranslationWrapper.java
index 945b96d..a27361a 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MachineTranslationWrapper.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MachineTranslationWrapper.java
@@ -25,18 +25,16 @@
import java.net.URLDecoder;
import java.nio.charset.StandardCharsets;
-import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
public class MachineTranslationWrapper {
- private final String translatorURL = "http://mymemory.translated.net/api/get?q=";
+ private static final String TRANSLATOR_URL = "http://mymemory.translated.net/api/get?q=";
public String translate(String sentence, String lang2lang){
if (sentence==null)
return null;
- String request = translatorURL + sentence.replace(' ','+') + "&langpair="+lang2lang;//"en|es";
- JSONArray arr=null, prodArr = null, searchURLviewArr = null;
+ String request = TRANSLATOR_URL + sentence.replace(' ','+') + "&langpair="+lang2lang;//"en|es";
try {
URL urlC = new URL(request);
URLConnection connection = urlC.openConnection();
@@ -53,20 +51,12 @@
JSONObject rootObject = new JSONObject(result.toString());
JSONObject findObject = rootObject.getJSONObject("responseData");
String transl = findObject.getString("translatedText");
- try {
- transl = URLDecoder.decode(transl, StandardCharsets.UTF_8);
- } catch (Exception e) {
-
- }
-
- return transl;
+ return URLDecoder.decode(transl, StandardCharsets.UTF_8);
} catch (IOException | JSONException e) {
-
e.printStackTrace();
return null;
}
-
}
public String rePhrase(String sentence){
@@ -79,16 +69,11 @@
else
return sentence;
}
-
-
-
+
public static void main(String[] args){
MachineTranslationWrapper rePhraser = new MachineTranslationWrapper();
-
System.out.println(rePhraser.translate("I went to the nearest bookstore to buy a book written by my friend and his aunt", "en|ru"));
-
System.out.println(rePhraser.rePhrase("I went to the nearest bookstore to buy a book written by my friend and his aunt"));
-
}
}
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MinedSentenceProcessor.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MinedSentenceProcessor.java
index dcb3ef4..e5590e7 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MinedSentenceProcessor.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MinedSentenceProcessor.java
@@ -22,6 +22,7 @@
import opennlp.tools.similarity.apps.utils.Utils;
public class MinedSentenceProcessor {
+
public static String acceptableMinedSentence(String sent) {
// if too many commas => seo text
@@ -54,7 +55,6 @@
return null;
}
- String[] pipes = StringUtils.split(sent, '|');
if (StringUtils.split(sent, '|').length > 2
|| StringUtils.split(sent, '>').length > 2) {
System.out.println("Rejection: too many |s or >s ");
@@ -93,7 +93,6 @@
|| sentTry.contains("[edit")
|| sentTry.contains("edit categories")
|| sentTry.contains("free license")
- || sentTry.contains("permission is granted")
|| sentTry.contains("under the terms")
|| sentTry.contains("rights reserved")
|| sentTry.contains("wikipedia") || sentTry.endsWith("the")
@@ -101,19 +100,19 @@
|| sentTry.contains("recipient of") || sentTry.contains("this message")
|| sentTry.contains("mailing list") || sentTry.contains("purchase order")
|| sentTry.contains("mon-fri") || sentTry.contains("email us") || sentTry.contains("privacy pol") || sentTry.contains("back to top")
- || sentTry.contains("click here") || sentTry.contains("for details") || sentTry.contains("assistance?") || sentTry.contains("chat live")
+ || sentTry.contains("for details") || sentTry.contains("assistance?") || sentTry.contains("chat live")
|| sentTry.contains("free shipping") || sentTry.contains("company info") || sentTry.contains("satisfaction g") || sentTry.contains("contact us")
- ||sentTry.startsWith( "fax") ||sentTry.startsWith( "write") || sentTry.startsWith( "email")|| sentTry.contains("conditions") || sentTry.contains("chat live")
- ||sentTry.startsWith( "we ") || sentTry.contains("the recipient") || sentTry.contains("day return") || sentTry.contains("days return")
+ ||sentTry.startsWith("write") || sentTry.startsWith( "email")|| sentTry.contains("conditions")
+ ||sentTry.startsWith("we ") || sentTry.contains("the recipient") || sentTry.contains("day return") || sentTry.contains("days return")
- ||sentTry.startsWith( "fax") || sentTry.contains("refund it") || sentTry.contains("your money")
- ||sentTry.startsWith( "free") || sentTry.contains("purchase orders")
- ||sentTry.startsWith( "exchange it ") || sentTry.contains("return it") || sentTry.contains("credit card")
+ ||sentTry.startsWith("fax") || sentTry.contains("refund it") || sentTry.contains("your money")
+ ||sentTry.startsWith("free") || sentTry.contains("purchase orders")
+ ||sentTry.startsWith("exchange it ") || sentTry.contains("return it") || sentTry.contains("credit card")
|| sentTry.contains("storeshop") || sentTry.startsWith( "find") || sentTry.startsWith( "shop") || sentTry.startsWith( "unlimited")
|| sentTry.contains("for a limited time") || sentTry.contains("prime members") || sentTry.contains("amazon members") || sentTry.contains("unlimited free")
|| sentTry.contains("shipping") || sentTry.startsWith( "amazon")
-// not a script text
+ // not a script text
|| sentTry.contains("document.body") || sentTry.contains(" var ") || sentTry.contains("search suggestions") ||sentTry.startsWith( "Search")
)
@@ -192,9 +191,8 @@
return pageSentence;
}
-
public static String normalizeForSentenceSplitting(String pageContent) {
- pageContent.replace("Jan.", "January").replace("Feb.", "February")
+ pageContent = pageContent.replace("Jan.", "January").replace("Feb.", "February")
.replace("Mar.", "March").replace("Apr.", "April")
.replace("Jun.", "June").replace("Jul.", "July")
.replace("Aug.", "August").replace("Sep.", "September")
@@ -202,6 +200,5 @@
.replace("Dec.", "December");
return pageContent;
-
}
}
\ No newline at end of file
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ReviewBuilderRunner.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ReviewBuilderRunner.java
index 6a11ad7..1cb64b2 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ReviewBuilderRunner.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ReviewBuilderRunner.java
@@ -25,7 +25,7 @@
public class ReviewBuilderRunner {
- private final List<Triple> input = new ArrayList<>();
+ private final List<Triple<String, Integer, String>> input = new ArrayList<>();
public ReviewBuilderRunner(){
@@ -168,14 +168,10 @@
//ProductFinderInAWebPage init = new ProductFinderInAWebPage("C:/workspace/relevanceEngine/src/test/resources");
ReviewBuilderRunner r = new ReviewBuilderRunner();
WebPageReviewExtractor extractor = new WebPageReviewExtractor("C:/workspace/relevanceEngine/src/test/resources");
- for(Triple query_ID : r.input ){
- String query = (String) query_ID.getFirst();
+ for(Triple<String, Integer, String> query_ID : r.input ){
+ String query = query_ID.getFirst();
List<String> res = extractor.formReviewsForAProduct(query);
-
ProfileReaderWriter.writeReportListStr(res, "formedReviewSentences"+ query +".csv");
}
-
-
-
}
}
\ No newline at end of file
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ReviewObj.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ReviewObj.java
index 0c23938..f8b4a34 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ReviewObj.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ReviewObj.java
@@ -21,134 +21,125 @@
public class ReviewObj {
- long bpid;
- long pid;
- float rating;
- String pros;
- String cons;
- String url;
- String title;
- String review;
- String keywordsName;
- float score;
- String[] origSentences;
- String[] featurePhrases;
+ private long bpid;
+ private long pid;
+ private float rating;
+ private String pros;
+ private String cons;
+ private String url;
+ private String title;
+ private String review;
+ private String keywordsName;
+ private float score;
+ private String[] origSentences;
+ private String[] featurePhrases;
+
+ private List<String> originalizedSentences ; //obtained from sentences;
+ private List<String> sentimentPhrases ; //obtained from sentences;
- List<String> originalizedSentences ; //obtained from sentences;
- List<String> sentimentPhrases ; //obtained from sentences;
-
- public ReviewObj(long bpid, long pid, float rating, String pros,
- String cons, String url, String title, String review,
- float score) {
- super();
- this.bpid = bpid;
- this.pid = pid;
- this.rating = rating;
- this.pros = pros;
- this.cons = cons;
- this.url = url;
- this.title = title;
- this.review = review;
- this.score = score;
- }
-
-
- public List<String> getSentimentPhrases() {
- return sentimentPhrases;
- }
+ public ReviewObj(long bpid, long pid, float rating, String pros,
+ String cons, String url, String title, String review, float score) {
+ this();
+ this.bpid = bpid;
+ this.pid = pid;
+ this.rating = rating;
+ this.pros = pros;
+ this.cons = cons;
+ this.url = url;
+ this.title = title;
+ this.review = review;
+ this.score = score;
+ }
+ public ReviewObj() {
+ }
- public void setSentimentPhrases(List<String> sentimentPhrases) {
- this.sentimentPhrases = sentimentPhrases;
- }
+ public List<String> getSentimentPhrases() {
+ return sentimentPhrases;
+ }
+ public void setSentimentPhrases(List<String> sentimentPhrases) {
+ this.sentimentPhrases = sentimentPhrases;
+ }
- public ReviewObj() {
- }
-
- public String[] getOrigSentences() {
- return origSentences;
- }
- public void setOrigSentences(String[] sentences) {
- this.origSentences = sentences;
- }
- public List<String> getOriginalizedSentences() {
- return originalizedSentences;
- }
+ public String[] getOrigSentences() {
+ return origSentences;
+ }
+ public void setOrigSentences(String[] sentences) {
+ this.origSentences = sentences;
+ }
+ public List<String> getOriginalizedSentences() {
+ return originalizedSentences;
+ }
+ public void setOriginalizedSentences(List<String> originalizedSentences) {
+ this.originalizedSentences = originalizedSentences;
+ }
- public void setOriginalizedSentences(List<String> originalizedSentences) {
- this.originalizedSentences = originalizedSentences;
- }
-
-
- public String[] getFeaturePhrases() {
- return featurePhrases;
- }
- public void setFeaturePhrases(String[] featurePhrases) {
- this.featurePhrases = featurePhrases;
- }
- public long getBpid() {
- return bpid;
- }
- public void setBpid(long bpid) {
- this.bpid = bpid;
- }
- public long getPid() {
- return pid;
- }
- public void setPid(long pid) {
- this.pid = pid;
- }
- public float getRating() {
- return rating;
- }
- public void setRating(float rating) {
- this.rating = rating;
- }
- public String getPros() {
- return pros;
- }
- public void setPros(String pros) {
- this.pros = pros;
- }
- public String getCons() {
- return cons;
- }
- public void setCons(String cons) {
- this.cons = cons;
- }
- public String getUrl() {
- return url;
- }
- public void setUrl(String url) {
- this.url = url;
- }
- public String getTitle() {
- return title;
- }
- public void setTitle(String title) {
- this.title = title;
- }
- public String getReview() {
- return review;
- }
- public void setReview(String review) {
- this.review = review;
- }
- public float getScore() {
- return score;
- }
- public void setScore(float score) {
- this.score = score;
- }
- public String getKeywordsName() {
-
- return this.keywordsName;
- }
- public void setKeywordsName(String kw) {
-
- keywordsName=kw;
- }
-
+ public String[] getFeaturePhrases() {
+ return featurePhrases;
+ }
+ public void setFeaturePhrases(String[] featurePhrases) {
+ this.featurePhrases = featurePhrases;
+ }
+ public long getBpid() {
+ return bpid;
+ }
+ public void setBpid(long bpid) {
+ this.bpid = bpid;
+ }
+ public long getPid() {
+ return pid;
+ }
+ public void setPid(long pid) {
+ this.pid = pid;
+ }
+ public float getRating() {
+ return rating;
+ }
+ public void setRating(float rating) {
+ this.rating = rating;
+ }
+ public String getPros() {
+ return pros;
+ }
+ public void setPros(String pros) {
+ this.pros = pros;
+ }
+ public String getCons() {
+ return cons;
+ }
+ public void setCons(String cons) {
+ this.cons = cons;
+ }
+ public String getUrl() {
+ return url;
+ }
+ public void setUrl(String url) {
+ this.url = url;
+ }
+ public String getTitle() {
+ return title;
+ }
+ public void setTitle(String title) {
+ this.title = title;
+ }
+ public String getReview() {
+ return review;
+ }
+ public void setReview(String review) {
+ this.review = review;
+ }
+ public float getScore() {
+ return score;
+ }
+ public void setScore(float score) {
+ this.score = score;
+ }
+ public String getKeywordsName() {
+ return this.keywordsName;
+ }
+ public void setKeywordsName(String kw) {
+ keywordsName=kw;
+ }
}
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/SentenceBeingOriginalized.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/SentenceBeingOriginalized.java
index b2bcad4..8e0a37d 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/SentenceBeingOriginalized.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/SentenceBeingOriginalized.java
@@ -26,50 +26,37 @@
private Map<String, String> sentKey_value;
private String sentence;
private List<List<ParseTreeChunk>> groupedChunks;
-
-
-
- public Map<String, String> getSentKey_value() {
- return sentKey_value;
- }
-
-
-
- public void setSentKey_value(Map<String, String> sentKey_value) {
- this.sentKey_value = sentKey_value;
- }
-
-
-
- public String getSentence() {
- return sentence;
- }
-
-
-
- public void setSentence(String sentence) {
- this.sentence = sentence;
- }
-
-
-
- public List<List<ParseTreeChunk>> getGroupedChunks() {
- return groupedChunks;
- }
-
-
-
- public void setGroupedChunks(List<List<ParseTreeChunk>> groupedChunks) {
- this.groupedChunks = groupedChunks;
- }
-
-
public SentenceBeingOriginalized(Map<String, String> sentKey_value,
- String sentence, List<List<ParseTreeChunk>> groupedChunks) {
+ String sentence, List<List<ParseTreeChunk>> groupedChunks) {
super();
this.sentKey_value = sentKey_value;
this.sentence = sentence;
this.groupedChunks = groupedChunks;
}
+
+ public Map<String, String> getSentKey_value() {
+ return sentKey_value;
+ }
+
+ public void setSentKey_value(Map<String, String> sentKey_value) {
+ this.sentKey_value = sentKey_value;
+ }
+
+ public String getSentence() {
+ return sentence;
+ }
+
+ public void setSentence(String sentence) {
+ this.sentence = sentence;
+ }
+
+ public List<List<ParseTreeChunk>> getGroupedChunks() {
+ return groupedChunks;
+ }
+
+ public void setGroupedChunks(List<List<ParseTreeChunk>> groupedChunks) {
+ this.groupedChunks = groupedChunks;
+ }
+
}
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/SentenceOriginalizer.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/SentenceOriginalizer.java
index bba4639..7b900df 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/SentenceOriginalizer.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/SentenceOriginalizer.java
@@ -63,8 +63,6 @@
this.sents = sents;
}
-
-
private void substituteProsCons(){
for(int i = 0; i< sents.length; i++){
if (sents[i]==null)
@@ -181,8 +179,7 @@
if (vps.size()==1)
line = rePhraser.rePhrase(line);
else {
- if (vps.size()>1)
-
+ if (vps.size()>1) {
for (ParseTreeChunk v: vps){
String verbLemma = v.getLemmas().get(0);
String newVerb = filter.getSynonym(verbLemma);
@@ -190,12 +187,13 @@
&& !newVerb.endsWith("ness") // empirical rule
&& !verbsShouldStayNoSubstition.contains(verbLemma) &&
!verbsShouldStayNoSubstition.contains(newVerb) ){
- line = line.replace(verbLemma+" ", newVerb+" ");
- line = line.replace(" "+verbLemma, " "+newVerb);
+ line = line.replace(verbLemma+" ", newVerb+" ");
+ line = line.replace(" "+verbLemma, " "+newVerb);
System.out.println("Synonym for verb substitution: "+verbLemma + "->"+newVerb);
bVerbRule = true;
}
}
+ }
if (!bVerbRule && vps.size()==2 && Math.random()>0.8) // no other means of originalization worked, so do inverse translation
line = rePhraser.rePhrase(line);
}
@@ -225,7 +223,6 @@
}
}
}
-
return line;
}
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/WebPageReviewExtractor.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/WebPageReviewExtractor.java
index 078d56c..4448f58 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/WebPageReviewExtractor.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/WebPageReviewExtractor.java
@@ -170,7 +170,7 @@
private List<String> cleanProductFeatures(List<String> productFeaturesList) {
List<String> results = new ArrayList<>();
for (String feature: productFeaturesList) {
- if (feature.startsWith("Unlimited Free") || feature.startsWith("View Larger") || feature.startsWith("View Larger") || feature.indexOf("shipping")>0)
+ if (feature.startsWith("Unlimited Free") || feature.startsWith("View Larger") || feature.indexOf("shipping")>0)
continue;
results.add(feature);
}
@@ -324,7 +324,7 @@
if (count%2==0 && count<features.length)
if (features[count]!=null){
buf.append(features[count]);
- if (!(features[count].endsWith("!") ||features[count].endsWith("?")||features[count].endsWith("?")
+ if (!(features[count].endsWith("!") ||features[count].endsWith("?")
||features[count].endsWith(".\"") ))
buf.append(". ");
}
@@ -364,7 +364,7 @@
if (count%2==0 && count<features.length)
if (features[count]!=null){
bufs[currentRevIndex].append(features[count]);
- if (!(features[count].endsWith("!") ||features[count].endsWith("?")||features[count].endsWith("?")
+ if (!(features[count].endsWith("!") ||features[count].endsWith("?")
||features[count].endsWith(".\"") ))
bufs[currentRevIndex].append(". ");
}
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/enron_email_recognizer/EmailNormalizer.java b/opennlp-similarity/src/main/java/opennlp/tools/enron_email_recognizer/EmailNormalizer.java
index 808788f..6e1ebe9 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/enron_email_recognizer/EmailNormalizer.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/enron_email_recognizer/EmailNormalizer.java
@@ -76,13 +76,15 @@
for(String l: lines){
boolean bAccept = true;
for(String h: HEADERS){
- if (l.startsWith(h)){
+ if (l.startsWith(h)) {
bAccept = false;
+ break;
}
}
for(String h: PROHIBITED_STRINGS){
- if (l.indexOf(h)>0){
+ if (l.indexOf(h) > 0) {
bAccept = false;
+ break;
}
}
if (bAccept)
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/enron_email_recognizer/EmailTrainingSetFormer.java b/opennlp-similarity/src/main/java/opennlp/tools/enron_email_recognizer/EmailTrainingSetFormer.java
index 1a2f89e..1a8ce6d 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/enron_email_recognizer/EmailTrainingSetFormer.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/enron_email_recognizer/EmailTrainingSetFormer.java
@@ -61,13 +61,15 @@
for(String l: lines){
boolean bAccept = true;
for(String h: EmailNormalizer.HEADERS){
- if (l.startsWith(h)){
+ if (l.startsWith(h)) {
bAccept = false;
+ break;
}
}
for(String h: EmailNormalizer.PROHIBITED_STRINGS){
- if (l.indexOf(h)>0){
+ if (l.indexOf(h) > 0) {
bAccept = false;
+ break;
}
}
if (bAccept)
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/fca/BasicLevelMetrics.java b/opennlp-similarity/src/main/java/opennlp/tools/fca/BasicLevelMetrics.java
index cfb3f2c..668e0ab 100755
--- a/opennlp-similarity/src/main/java/opennlp/tools/fca/BasicLevelMetrics.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/fca/BasicLevelMetrics.java
@@ -103,8 +103,6 @@
return new double[] {simJ, simSMC};
}
-
-
public double avgCohSMC (FormalConcept c){
double sum = 0;
if (c.extent.size() == 1)
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/fca/ConceptLattice.java b/opennlp-similarity/src/main/java/opennlp/tools/fca/ConceptLattice.java
index 2de4345..6d59154 100755
--- a/opennlp-similarity/src/main/java/opennlp/tools/fca/ConceptLattice.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/fca/ConceptLattice.java
@@ -63,7 +63,7 @@
public ConceptLattice(String filename, boolean stats) throws IOException {
FcaReader fr = new FcaReader();
- fr.ReadContextFromCxt(filename);
+ fr.readContextFromCxt(filename);
this.objectCount = fr.getObjectsCount();
this.attributeCount = fr.getAttributesCount();
this.binaryContext = fr.getBinaryContext();
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/fca/FcaReader.java b/opennlp-similarity/src/main/java/opennlp/tools/fca/FcaReader.java
index 2d1caeb..e55aa3a 100755
--- a/opennlp-similarity/src/main/java/opennlp/tools/fca/FcaReader.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/fca/FcaReader.java
@@ -29,13 +29,12 @@
int objectsNumber = 0;
int attributesNumber = 0;
- public void ReadContextFromCxt(String filename) throws IOException{
+ public void readContextFromCxt(String filename) {
obNames = new ArrayList<>();
atNames = new ArrayList<>();
- BufferedReader br = new BufferedReader(new FileReader(filename));
- try {
+ try (BufferedReader br = new BufferedReader(new FileReader(filename))) {
String line;
br.readLine(); //B
br.readLine();
@@ -83,9 +82,8 @@
public static void main(String []args) throws IOException{
-
FcaReader loader = new FcaReader();
- loader.ReadContextFromCxt("C://Users/Tanya/Desktop/�����/1 �������/������������� ��������� � ������� ������/�������/sports.cxt");
+ loader.readContextFromCxt("C://Users/Tanya/Desktop/�����/1 �������/������������� ��������� � ������� ������/�������/sports.cxt");
}
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/fca/FcaWriter.java b/opennlp-similarity/src/main/java/opennlp/tools/fca/FcaWriter.java
index f43dbb8..94cc048 100755
--- a/opennlp-similarity/src/main/java/opennlp/tools/fca/FcaWriter.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/fca/FcaWriter.java
@@ -25,7 +25,7 @@
public class FcaWriter {
- public void WriteAsCxt(String filename, ConceptLattice cl){
+ public void writeAsCxt(String filename, ConceptLattice cl){
try (Writer writer = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(filename), StandardCharsets.UTF_8))) {
@@ -50,7 +50,7 @@
}
} catch (IOException ex) {
- System.err.println(ex.getMessage());
+ ex.printStackTrace();
}
}
@@ -99,12 +99,10 @@
}
}
-
public static void main(String []args) throws IOException{
ConceptLattice cl = new ConceptLattice("sports.cxt",false);
FcaWriter writer = new FcaWriter();
- writer.WriteAsCxt("res.cxt",cl);
-
+ writer.writeAsCxt("res.cxt",cl);
}
}
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/fca/Measures.java b/opennlp-similarity/src/main/java/opennlp/tools/fca/Measures.java
index 7700e41..531c4df 100755
--- a/opennlp-similarity/src/main/java/opennlp/tools/fca/Measures.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/fca/Measures.java
@@ -55,7 +55,7 @@
min_delta = cl.attributeCount;
sum = 0;
FormalConcept fc = cl.conceptList.get(i);
- for (Integer j: fc.children) {
+ for (int j: fc.children) {
delta = fc.getExtent().size() - cl.conceptList.get(j).getExtent().size();
if (delta<min_delta)
min_delta = delta;
@@ -104,10 +104,10 @@
public double intentProbability(ArrayList<Integer> intent){
double pB = 1;
- for (Integer integer : intent) {
+ for (int integer : intent) {
pB *= attributeProbability(integer);
}
- return pB;
+ return pB;
}
public void probability(){
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/JSMLearnerOnLatticeWithDeduction.java b/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/JSMLearnerOnLatticeWithDeduction.java
index 03e8b1e..d9c8b83 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/JSMLearnerOnLatticeWithDeduction.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/JSMLearnerOnLatticeWithDeduction.java
@@ -179,8 +179,10 @@
for(String posText:posTexts){
boolean multiwordOccurs = true;
for(String keyword: keywords){
- if (!posText.contains(keyword))
+ if (!posText.contains(keyword)) {
multiwordOccurs = false;
+ break;
+ }
break;
}
if (multiwordOccurs)
@@ -191,8 +193,10 @@
for(String negText:negTexts){
boolean multiwordOccurs = true;
for(String keyword: keywords){
- if (!negText.contains(keyword))
+ if (!negText.contains(keyword)) {
multiwordOccurs = false;
+ break;
+ }
break;
}
if (multiwordOccurs)
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/nl2code/NL2ObjCreateAssign.java b/opennlp-similarity/src/main/java/opennlp/tools/nl2code/NL2ObjCreateAssign.java
index 0e5811e..1a16adb 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/nl2code/NL2ObjCreateAssign.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/nl2code/NL2ObjCreateAssign.java
@@ -60,7 +60,6 @@
sentence = sentence.split(":")[0]+".";
}
-
List<ObjectPhrase> oPhrases = new ArrayList<>();
parser = ParserChunker2MatcherProcessor.getInstance();
List<List<ParseTreeChunk>> lingPhrases =
@@ -79,8 +78,8 @@
List<String> lems = verbChunk.getLemmas();
String declarativeAction = verbChunk.getLemmas().get(0).toLowerCase();
if (declarativeAction.equals("define")){
- if (verbChunk.getLemmas().get(1).toLowerCase().equals("class") ||
- verbChunk.getLemmas().get(2).toLowerCase().equals("class")){
+ if (verbChunk.getLemmas().get(1).equalsIgnoreCase("class") ||
+ verbChunk.getLemmas().get(2).equalsIgnoreCase("class")){
// new class
String className = verbChunk.getLemmas().get(verbChunk.getLemmas().size()-1).toLowerCase();
className = className.substring(0, 1).toUpperCase()+className.substring(1, className.length());
@@ -91,12 +90,12 @@
}
String dataType = verbChunk.getLemmas().get(1).toLowerCase();
- if (classBeingDefined && Arrays.asList(DATA_TYPES_LIST).contains(dataType) && verbChunk.getLemmas().get(2).toLowerCase().equals("attribute")){
+ if (classBeingDefined && Arrays.asList(DATA_TYPES_LIST).contains(dataType) && verbChunk.getLemmas().get(2).equalsIgnoreCase("attribute")){
op.setOperatorFor(dataType + " "+verbChunk.getLemmas().get(verbChunk.getLemmas().size()-1).toLowerCase());
classBeingDefined = true;
break;
}
- if (Arrays.asList(DATA_TYPES_LIST).contains(dataType) && verbChunk.getLemmas().get(2).toLowerCase().equals("attribute")){
+ if (Arrays.asList(DATA_TYPES_LIST).contains(dataType) && verbChunk.getLemmas().get(2).equalsIgnoreCase("attribute")){
op.setOperatorFor(dataType + " "+verbChunk.getLemmas().get(verbChunk.getLemmas().size()-1).toLowerCase());
classBeingDefined = true;
break;
@@ -104,7 +103,7 @@
} else if (declarativeAction.equals("create")){
// now substituting array
- if (verbChunk.getLemmas().get(1).toLowerCase().equals("array")){
+ if (verbChunk.getLemmas().get(1).equalsIgnoreCase("array")){
if(lems.contains("class")){
int indClass = lems.indexOf("class");
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/MinedSentenceProcessor.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/MinedSentenceProcessor.java
deleted file mode 100644
index fddef77..0000000
--- a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/MinedSentenceProcessor.java
+++ /dev/null
@@ -1,207 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.tools.parse_thicket.apps;
-
-import opennlp.tools.similarity.apps.utils.Utils;
-
-import org.apache.commons.lang.StringUtils;
-
-public class MinedSentenceProcessor {
- public static String acceptableMinedSentence(String sent) {
- // if too many commas => seo text
-
- String[] commas = StringUtils.split(sent, ',');
- String[] spaces = StringUtils.split(sent, ' ');
- if ((float) commas.length / (float) spaces.length > 0.7) {
- System.out.println("Rejection: too many commas");
- return null;
- }
-
- String[] otherDelimiters = StringUtils.split(sent, '/');
- if ((float) otherDelimiters.length / (float) spaces.length > 0.7) {
- System.out.println("Rejection: too many delimiters");
- return null;
- }
-
- otherDelimiters = StringUtils.split(sent, '.');
- if ((float) otherDelimiters.length / (float) spaces.length > 0.7) {
- System.out.println("Rejection: too many delimiters");
- return null;
- }
- otherDelimiters = StringUtils.split(sent, '!');
- if ((float) otherDelimiters.length / (float) spaces.length > 0.7) {
- System.out.println("Rejection: too many delimiters");
- return null;
- }
- otherDelimiters = StringUtils.split(sent, '=');
- if ((float) otherDelimiters.length / (float) spaces.length > 0.7) {
- System.out.println("Rejection: too many delimiters");
- return null;
- }
-
- String[] pipes = StringUtils.split(sent, '|');
- if (StringUtils.split(sent, '|').length > 2
- || StringUtils.split(sent, '>').length > 2) {
- System.out.println("Rejection: too many |s or >s ");
- return null;
- }
- String sentTry = sent.toLowerCase();
- // if too many long spaces
- String sentSpaces = sentTry.replace(" ", "");
- if (sentSpaces.length() - sentTry.length() > 10) // too many spaces -
- // suspicious
- return null;
-
- if (sentTry.contains("click here") || sentTry.contains(" wikip")
- || sentTry.contains("copyright")
- || sentTry.contains("operating hours")
- || sentTry.contains("days per week")
- || sentTry.contains("click for") || sentTry.contains("photos")
- || sentTry.contains("find the latest")
- || sentTry.startsWith("subscribe")
- || sentTry.contains("Terms of Service")
- || sentTry.contains("clicking here")
- || sentTry.contains("skip to") || sentTry.contains("sidebar")
- || sentTry.contains("Tags:") || sentTry.startsWith("Posted by")
- || sentTry.contains("available online")
- || sentTry.contains("get online")
- || sentTry.contains("buy online")
- || sentTry.contains("not valid") || sentTry.contains("discount")
- || sentTry.contains("official site")
- || sentTry.contains("this video")
- || sentTry.contains("this book")
- || sentTry.contains("this product")
- || sentTry.contains("paperback") || sentTry.contains("hardcover")
- || sentTry.contains("audio cd")
- || sentTry.contains("related searches")
- || sentTry.contains("permission is granted")
- || sentTry.contains("[edit")
- || sentTry.contains("edit categories")
- || sentTry.contains("free license")
- || sentTry.contains("permission is granted")
- || sentTry.contains("under the terms")
- || sentTry.contains("rights reserved")
- || sentTry.contains("wikipedia") || sentTry.endsWith("the")
- || sentTry.endsWith("the.") || sentTry.startsWith("below")
- || sentTry.contains("recipient of") || sentTry.contains("this message")
- || sentTry.contains("mailing list") || sentTry.contains("purchase order")
- || sentTry.contains("mon-fri") || sentTry.contains("email us") || sentTry.contains("privacy pol") || sentTry.contains("back to top")
- || sentTry.contains("click here") || sentTry.contains("for details") || sentTry.contains("assistance?") || sentTry.contains("chat live")
- || sentTry.contains("free shipping") || sentTry.contains("company info") || sentTry.contains("satisfaction g") || sentTry.contains("contact us")
- ||sentTry.startsWith( "fax") ||sentTry.startsWith( "write") || sentTry.startsWith( "email")|| sentTry.contains("conditions") || sentTry.contains("chat live")
- ||sentTry.startsWith( "we ") || sentTry.contains("the recipient") || sentTry.contains("day return") || sentTry.contains("days return")
-
- ||sentTry.startsWith( "fax") || sentTry.contains("refund it") || sentTry.contains("your money")
- ||sentTry.startsWith( "free") || sentTry.contains("purchase orders")
- ||sentTry.startsWith( "exchange it ") || sentTry.contains("return it") || sentTry.contains("credit card")
-
- || sentTry.contains("storeshop") || sentTry.startsWith( "find") || sentTry.startsWith( "shop") || sentTry.startsWith( "unlimited")
- || sentTry.contains("for a limited time") || sentTry.contains("prime members") || sentTry.contains("amazon members") || sentTry.contains("unlimited free")
- || sentTry.contains("shipping") || sentTry.startsWith( "amazon")
-// not a script text
- || sentTry.contains("document.body") || sentTry.contains(" var ") || sentTry.contains("search suggestions") ||sentTry.startsWith( "Search")
-
- )
- return null;
-
- //Millions of Amazon Prime members enjoy instant videos, free Kindle books and unlimited free two-day shipping.
-
- // count symbols indicating wrong parts of page to mine for text
- // if short and contains too many symbols indicating wrong area: reject
- String sentWrongSym = sentTry.replace(">", "&&&").replace("�", "&&&")
- .replace("|", "&&&").replace(":", "&&&").replace("/", "&&&")
- .replace("-", "&&&").replace("%", "&&&");
- if ((sentWrongSym.length() - sentTry.length()) >= 4
- && sentTry.length() < 200) // twice ot more
- return null;
-
- sent = sent.replace('[', ' ').replace(']', ' ')
- .replace("_should_find_orig_", "").replace(". .", ". ")
- .replace("amp;", " ").replace("1.", " ").replace("2.", " ")
- .replace("3.", " ").replace("4.", " ").replace("2009", "2011")
- .replace("2008", "2011").replace("2006", "2011")
- .replace("2007", "2011").replace("VIDEO:", " ").replace("Video:", " ")
- .replace("no comments", " ").replace(" ", " ").replace(" ", " ")
- .replace("(more.)", "").replace("more.", "").replace("<more>", "")
- .replace("[more]", "").replace(".,", ".").replace("<", "")
- .replace("p>", "").replace("product description", "");
-
- // TODO .replace("a.", ".");
-
- int endIndex = sent.indexOf(" posted");
- if (endIndex > 0)
- sent = sent.substring(0, endIndex);
-
- return sent;
- }
-
- public static String processSentence(String pageSentence) {
- if (pageSentence == null)
- return "";
- pageSentence = Utils.fullStripHTML(pageSentence);
- pageSentence = StringUtils.chomp(pageSentence, "..");
- pageSentence = StringUtils.chomp(pageSentence, ". .");
- pageSentence = StringUtils.chomp(pageSentence, " .");
- pageSentence = StringUtils.chomp(pageSentence, ".");
- pageSentence = StringUtils.chomp(pageSentence, "...");
- pageSentence = StringUtils.chomp(pageSentence, " ....");
- pageSentence = pageSentence.replace("::", ":").replace(".,", ". ")
- .replace("(.)", "");
-
- pageSentence = pageSentence.trim();
- pageSentence = pageSentence.replaceAll("\\s+", " "); // make single
- // spaces
- // everywhere
-
- String[] pipes = StringUtils.split(pageSentence, '|'); // removed
- // shorter part
- // of sentence
- // at the end
- // after pipe
- if (pipes.length == 2
- && ((float) pipes[0].length() / (float) pipes[1].length() > 3.0)) {
- int pipePos = pageSentence.indexOf("|");
- if (pipePos > -1)
- pageSentence = pageSentence.substring(0, pipePos - 1).trim();
-
- }
-
- if (!StringUtils.contains(pageSentence, '.')
- && !StringUtils.contains(pageSentence, '?')
- && !StringUtils.contains(pageSentence, '!'))
- pageSentence = pageSentence + ". ";
-
- pageSentence = pageSentence.replace(" .", ".").replace("..", ".").trim();
- if (!pageSentence.endsWith("."))
- pageSentence += ". ";
- return pageSentence;
- }
-
-
- public static String normalizeForSentenceSplitting(String pageContent) {
- pageContent.replace("Jan.", "January").replace("Feb.", "February")
- .replace("Mar.", "March").replace("Apr.", "April")
- .replace("Jun.", "June").replace("Jul.", "July")
- .replace("Aug.", "August").replace("Sep.", "September")
- .replace("Oct.", "October").replace("Nov.", "November")
- .replace("Dec.", "December");
-
- return pageContent;
-
- }
-}
\ No newline at end of file
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/MultiSentenceSearchResultsProcessor.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/MultiSentenceSearchResultsProcessor.java
index 47f5fad..b027363 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/MultiSentenceSearchResultsProcessor.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/MultiSentenceSearchResultsProcessor.java
@@ -59,7 +59,7 @@
count++;
String[] pageSentsAndSnippet = formTextForReRankingFromHit(hit);
- Double score = 0.0;
+ double score = 0.0;
try {
SentencePairMatchResult match;
if (pageSentsAndSnippet!=null && pageSentsAndSnippet[0].length()>50){
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/SnippetToParagraph.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/SnippetToParagraph.java
index d01fec2..c22ba9f 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/SnippetToParagraph.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/SnippetToParagraph.java
@@ -318,15 +318,6 @@
return sents;
}
- private List<String> cleanProductFeatures(List<String> productFeaturesList) {
- List<String> results = new ArrayList<>();
- for(String feature: productFeaturesList){
- if (feature.startsWith("Unlimited Free") || feature.startsWith("View Larger") || feature.startsWith("View Larger") || feature.indexOf("shipping")>0)
- continue;
- results.add(feature);
- }
- return results;
- }
public static class TextChunk {
public TextChunk(String s, int length) {
this.text = s;
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/WebPageContentSentenceExtractor.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/WebPageContentSentenceExtractor.java
index 83851c7..df0c6c0 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/WebPageContentSentenceExtractor.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/WebPageContentSentenceExtractor.java
@@ -26,9 +26,6 @@
import org.apache.commons.lang.StringUtils;
public class WebPageContentSentenceExtractor extends WebPageExtractor {
-
-
-
public List<String> extractSentencesWithPotentialReviewPhrases(String url)
{
@@ -84,16 +81,6 @@
return sents;
}
- private List<String> cleanProductFeatures(List<String> productFeaturesList) {
- List<String> results = new ArrayList<>();
- for(String feature: productFeaturesList){
- if (feature.startsWith("Unlimited Free") || feature.startsWith("View Larger") || feature.startsWith("View Larger") || feature.indexOf("shipping")>0)
- continue;
- results.add(feature);
- }
- return results;
- }
-
// extracts paragraphs from web page
protected String[] cleanListOfSents(String[] longestSents)
{
@@ -123,8 +110,6 @@
return sentsClean.toArray(new String[0]);
}
-
-
private String startWithCapitalSent(String sent) {
String firstChar = sent.substring(0,1);
String remainder = sent.substring(1);
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/PhraseGroupGeneralizer.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/PhraseGroupGeneralizer.java
index 01fe299..1fe19d2 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/PhraseGroupGeneralizer.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/PhraseGroupGeneralizer.java
@@ -37,7 +37,7 @@
*
* @param o1
* @param o2
- * @return List<List<ParseTreeChunk>> list of POS-words pairs for each
+ * @return {@link List ParseTreeChunk} list of POS-words pairs for each
* resultant matched / overlapped phrase
*/
@Override
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/pattern_structure/PhrasePatternStructure.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/pattern_structure/PhrasePatternStructure.java
index d910352..dbda0d8 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/pattern_structure/PhrasePatternStructure.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/pattern_structure/PhrasePatternStructure.java
@@ -163,14 +163,18 @@
List<ParseTreeChunk> nps = new ArrayList<>(), vps = new ArrayList<>(), pps = new ArrayList<>();
for(List<ParseTreeNode> ps:phrs) {
ParseTreeChunk ch = convertNodeListIntoChunk(ps);
- String ptype = ps.get(0).getPhraseType();
LOG.debug(ps.toString());
- if (ptype.equals("NP")){
- nps.add(ch);
- } else if (ptype.equals("VP")){
- vps.add(ch);
- } else if (ptype.equals("PP")){
- pps.add(ch);
+ String ptype = ps.get(0).getPhraseType();
+ switch (ptype) {
+ case "NP":
+ nps.add(ch);
+ break;
+ case "VP":
+ vps.add(ch);
+ break;
+ case "PP":
+ pps.add(ch);
+ break;
}
}
results.add(nps); results.add(vps); results.add(pps);
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGenerator.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGenerator.java
index 33f387b..d26748d 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGenerator.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGenerator.java
@@ -146,9 +146,12 @@
boolean bAccept = true;
for (String w : qs) {
if (w.toLowerCase().equals(w)) // idf only two words then
- // has to be person name,
- // title or geolocation
+ // has to be person name,
+ // title or geolocation
+ {
bAccept = false;
+ break;
+ }
}
if (!bAccept)
continue;
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGeneratorSupport.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGeneratorSupport.java
index b846abf..abdb60c 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGeneratorSupport.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGeneratorSupport.java
@@ -77,9 +77,12 @@
boolean bAccept = true;
for (String w : qs) {
if (w.toLowerCase().equals(w)) // idf only two words then
- // has to be person name,
- // title or geolocation
+ // has to be person name,
+ // title or geolocation
+ {
bAccept = false;
+ break;
+ }
}
if (!bAccept)
continue;
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinder.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinder.java
index 209e29a..2404804 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinder.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinder.java
@@ -217,9 +217,12 @@
boolean bAccept = true;
for (String w : qs) {
if (w.toLowerCase().equals(w)) // idf only two words then
- // has to be person name,
- // title or geolocation
+ // has to be person name,
+ // title or geolocation
+ {
bAccept = false;
+ break;
+ }
}
if (!bAccept)
continue;
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxonomyExtenderViaMebMining.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxonomyExtenderViaMebMining.java
index cbd1229..4442b40 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxonomyExtenderViaMebMining.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxonomyExtenderViaMebMining.java
@@ -34,10 +34,12 @@
/**
* Results of taxonomy learning are two maps 0) For an entity like tax it gives
* all lists of associated parameters obtained from the taxonomy kernel (done
- * manually) Now, given 0, we obtain the derived list of parameters as
+ * manually).
+ * <p>
+ * Now, given 0, we obtain the derived list of parameters as
* commonalities of search results snapshots output map 1) for the entity,
- * derived list output map 2) for such manual list of words -> derived list of
- * words
+ * derived list output map 2) for such manual list of words - derived list of
+ * words.
*/
public class TaxonomyExtenderViaMebMining extends BingQueryRunner {
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/ValueSortMap.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/ValueSortMap.java
index fc2fdeb..3ffcb94 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/ValueSortMap.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/ValueSortMap.java
@@ -164,10 +164,10 @@
Map<K, V> hmTmpMap = new HashMap<>(iSize);
Map<K, V> hmNullValueMap = new HashMap<>();
+ keySet = inMap.keySet();
+ itKeyList = keySet.iterator();// Add Null Values in the last of the LinkedHasMap
if (bAllDistinct) {
// There are no multiple same values of the passed map (without considering null)
- keySet = inMap.keySet();
- itKeyList = keySet.iterator();
while (itKeyList.hasNext()) {
key = itKeyList.next();
value = inMap.get(key);
@@ -192,14 +192,8 @@
sortedMap.put(key, value);
}
- if (ascendingOrder == null || ascendingOrder) {
- // Add Null Values in the last of the LinkedHasMap
- sortedMap.putAll(hmNullValueMap);
- }
} else {
// There are some multiple values (without considering null)
- keySet = inMap.keySet();
- itKeyList = keySet.iterator();
while (itKeyList.hasNext()) {
key = itKeyList.next();
value = inMap.get(key);
@@ -234,10 +228,10 @@
}
}
- if (ascendingOrder == null || ascendingOrder) {
- // Add Null Values in the last of the LinkedHasMap
- sortedMap.putAll(hmNullValueMap);
- }
+ }
+ if (ascendingOrder == null || ascendingOrder) {
+ // Add Null Values in the last of the LinkedHasMap
+ sortedMap.putAll(hmNullValueMap);
}
return sortedMap;
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeMatcherDeterministic.java b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeMatcherDeterministic.java
index 9a78670..d27f3f4 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeMatcherDeterministic.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeMatcherDeterministic.java
@@ -212,7 +212,7 @@
*
* @param sent1
* @param sent2
- * @return List<List<ParseTreeChunk>> list of POS-words pairs for each resultant matched / overlapped phrase.
+ * @return {@link List ParseTreeChunk} list of POS-words pairs for each resultant matched / overlapped phrase.
*/
public List<List<ParseTreeChunk>> matchTwoSentencesGroupedChunksDeterministic(
List<List<ParseTreeChunk>> sent1, List<List<ParseTreeChunk>> sent2) {
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/SentencePairMatchResult.java b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/SentencePairMatchResult.java
index 2a74997..de13ae3 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/SentencePairMatchResult.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/SentencePairMatchResult.java
@@ -86,9 +86,10 @@
for (LemmaPair word : resForMinedSent1) {
if (word.getPOS().startsWith("VB") && word.getStartPos() < 1
- && word.getEndPos() < 1) {
+ && word.getEndPos() < 1) {
imperativeVerb = true;
// LOG.info("Found imperative verb=" + word);
+ break;
}
}
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/word2vec/W2VDistanceMeasurer.java b/opennlp-similarity/src/main/java/opennlp/tools/word2vec/W2VDistanceMeasurer.java
index a898172..69ad781 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/word2vec/W2VDistanceMeasurer.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/word2vec/W2VDistanceMeasurer.java
@@ -29,14 +29,12 @@
import org.deeplearning4j.models.embeddings.loader.WordVectorSerializer;
import org.deeplearning4j.models.word2vec.VocabWord;
import org.deeplearning4j.models.word2vec.Word2Vec;
-import org.deeplearning4j.models.word2vec.wordstore.VocabCache;
import org.deeplearning4j.models.word2vec.wordstore.inmemory.InMemoryLookupCache;
import org.deeplearning4j.text.sentenceiterator.FileSentenceIterator;
import org.deeplearning4j.text.sentenceiterator.SentenceIterator;
import org.deeplearning4j.text.tokenization.tokenizer.preprocessor.CommonPreprocessor;
import org.deeplearning4j.text.tokenization.tokenizerfactory.DefaultTokenizerFactory;
import org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory;
-import org.nd4j.common.primitives.Pair;
public class W2VDistanceMeasurer {
static W2VDistanceMeasurer instance;
@@ -62,8 +60,7 @@
String pathToW2V = resourceDir + "/w2v/GoogleNews-vectors-negative300.bin.gz";
File gModel = new File(pathToW2V);
try {
- Pair<InMemoryLookupTable, VocabCache> pair = WordVectorSerializer.loadTxt(Files.newInputStream(gModel.toPath()));
- vec = WordVectorSerializer.fromPair(pair);
+ vec = WordVectorSerializer.fromPair(WordVectorSerializer.loadTxt(Files.newInputStream(gModel.toPath())));
} catch (IOException e) {
System.out.println("Word2vec model is not loaded");
vec = null;
diff --git a/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/pattern_structure/PhraseTest.java b/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/pattern_structure/PhraseTest.java
index 9d7eb0f..948291d 100755
--- a/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/pattern_structure/PhraseTest.java
+++ b/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/pattern_structure/PhraseTest.java
@@ -154,7 +154,7 @@
FcaWriter wt = new FcaWriter();
wt.WriteStatsToTxt("merkel_stats.txt", new_cl, 0);
wt.WriteStatsToCvs("merkel_stats.csv", new_cl, ps.conceptList.size());
- wt.WriteAsCxt("merkel_lattice.cxt", new_cl);
+ wt.writeAsCxt("merkel_lattice.cxt", new_cl);
PatternStructureWriter pswt = new PatternStructureWriter();
pswt.WriteStatsToTxt("ps_res.txt", ps);
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SemcorReaderExtended.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SemcorReaderExtended.java
index 538506c..740d932 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SemcorReaderExtended.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SemcorReaderExtended.java
@@ -136,12 +136,11 @@
if (nWord.getNodeName().equals(ELEMENT_WORDFORM)) {
Element eWord = (Element) nWord;
-
+ String word = eWord.getTextContent();
+ String cmd = eWord.getAttribute(ATTRIBUTE_CMD);
+ String pos = eWord.getAttribute(ATTRIBUTE_POS);
if (eWord.getAttribute(ATTRIBUTE_CMD).equals("done")) {
// if the word is already disambiguated
- String word = eWord.getTextContent();
- String cmd = eWord.getAttribute(ATTRIBUTE_CMD);
- String pos = eWord.getAttribute(ATTRIBUTE_POS);
String lemma = eWord.getAttribute(ATTRIBUTE_LEMMA);
String wnsn = eWord.getAttribute(ATTRIBUTE_WNSN);
String lexsn = eWord.getAttribute(ATTRIBUTE_LEXSN);
@@ -149,21 +148,16 @@
Word iword = new Word(paragraphID, sentenceID, wnum,
Word.Type.WORD, word, cmd, pos, lemma, wnsn, lexsn);
isentence.addIword(iword);
- wnum++;
// System.out.println("*** " + iword.toString() + " ***");
} else {
// if the word is not disambiguated
- String word = eWord.getTextContent();
- String cmd = eWord.getAttribute(ATTRIBUTE_CMD);
- String pos = eWord.getAttribute(ATTRIBUTE_POS);
-
Word iword = new Word(paragraphID, sentenceID, wnum,
Word.Type.WORD, word, cmd, pos);
isentence.addIword(iword);
- wnum++;
}
+ wnum++;
} else if (nWord.getNodeName().equals(ELEMENT_PUNCTUATION)) {
Element eWord = (Element) nWord;