OPENNLP-723 - fixed cky method, minor fixes to formatting
diff --git a/nlp-utils/src/main/java/org/apache/opennlp/utils/anomalydetection/AnomalyDetectionUtils.java b/nlp-utils/src/main/java/org/apache/opennlp/utils/anomalydetection/AnomalyDetectionUtils.java
index 0fdf326..0d7d4a8 100644
--- a/nlp-utils/src/main/java/org/apache/opennlp/utils/anomalydetection/AnomalyDetectionUtils.java
+++ b/nlp-utils/src/main/java/org/apache/opennlp/utils/anomalydetection/AnomalyDetectionUtils.java
@@ -19,7 +19,6 @@
package org.apache.opennlp.utils.anomalydetection;
import java.math.BigDecimal;
-
import org.apache.opennlp.utils.TrainingExample;
import org.apache.opennlp.utils.TrainingSet;
@@ -33,7 +32,6 @@
*
* @param inputs the {@link org.apache.opennlp.utils.TrainingSet} to fit
* @return the <code>double[]</code> containing the Mu parameters for each feature
- * @throws Exception
*/
public static double[] fitMus(TrainingSet inputs) {
assert inputs != null && inputs.size() > 0 : "empty dataset";
@@ -54,7 +52,6 @@
* @param mus mean parameters
* @param inputs the {@link TrainingSet} to fit
* @return the <code>double[]</code> containing the standard deviations
- * @throws Exception
*/
public static double[] fitSigmas(double[] mus, TrainingSet inputs) {
assert inputs != null && inputs.size() > 0 : "empty dataset";
@@ -84,10 +81,10 @@
/**
* calculate the probability of a certain input in a certain training set
*
- * @param x the input
- * @param set the training set
+ * @param x the input
+ * @param set the training set
* @return the probability of the given input
- * @throws Exception
+ * @throws Exception
*/
public static double getGaussianProbability(TrainingExample x, TrainingSet set) throws Exception {
double[] mus = fitMus(set);
@@ -96,7 +93,7 @@
}
private static double calculateGaussianProbability(TrainingExample x, double[] mus,
- double[] sigmas) {
+ double[] sigmas) {
assert mus.length == sigmas.length : "parameters not aligned";
BigDecimal px = new BigDecimal(1d);
for (int i = 0; i < mus.length; i++) {
diff --git a/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGBuilder.java b/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGBuilder.java
index 7d3b33e..7cca8ee 100644
--- a/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGBuilder.java
+++ b/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGBuilder.java
@@ -25,44 +25,44 @@
*/
public class CFGBuilder {
- private Collection<String> nonTerminalSymbols;
- private Collection<String> terminalSymbols;
- private Collection<Rule> rules;
- private String startSymbol;
- private boolean randomExpansion;
+ private Collection<String> nonTerminalSymbols;
+ private Collection<String> terminalSymbols;
+ private Collection<Rule> rules;
+ private String startSymbol;
+ private boolean randomExpansion;
- public static CFGBuilder createCFG() {
- return new CFGBuilder();
- }
+ public static CFGBuilder createCFG() {
+ return new CFGBuilder();
+ }
- public CFGBuilder withTerminals(Collection<String> terminalSymbols) {
- this.terminalSymbols = terminalSymbols;
- return this;
- }
+ public CFGBuilder withTerminals(Collection<String> terminalSymbols) {
+ this.terminalSymbols = terminalSymbols;
+ return this;
+ }
- public CFGBuilder withNonTerminals(Collection<String> nonTerminalSymbols) {
- this.nonTerminalSymbols = nonTerminalSymbols;
- return this;
- }
+ public CFGBuilder withNonTerminals(Collection<String> nonTerminalSymbols) {
+ this.nonTerminalSymbols = nonTerminalSymbols;
+ return this;
+ }
- public CFGBuilder withRules(Collection<Rule> rules) {
- this.rules = rules;
- return this;
- }
+ public CFGBuilder withRules(Collection<Rule> rules) {
+ this.rules = rules;
+ return this;
+ }
- public CFGBuilder withStartSymbol(String startSymbol) {
- this.startSymbol = startSymbol;
- return this;
- }
+ public CFGBuilder withStartSymbol(String startSymbol) {
+ this.startSymbol = startSymbol;
+ return this;
+ }
- public CFGBuilder withRandomExpansion(boolean randomExpansion) {
- this.randomExpansion = randomExpansion;
- return this;
- }
+ public CFGBuilder withRandomExpansion(boolean randomExpansion) {
+ this.randomExpansion = randomExpansion;
+ return this;
+ }
- public ContextFreeGrammar build() {
- assert nonTerminalSymbols != null && terminalSymbols != null && rules != null && startSymbol != null :
- "missing definitions { V : " + nonTerminalSymbols + ", ∑ : " + terminalSymbols + ", R : " + rules + ", S : " + startSymbol + "}";
- return new ContextFreeGrammar(nonTerminalSymbols, terminalSymbols, rules, startSymbol, randomExpansion);
- }
+ public ContextFreeGrammar build() {
+ assert nonTerminalSymbols != null && terminalSymbols != null && rules != null && startSymbol != null :
+ "missing definitions { V : " + nonTerminalSymbols + ", ∑ : " + terminalSymbols + ", R : " + rules + ", S : " + startSymbol + "}";
+ return new ContextFreeGrammar(nonTerminalSymbols, terminalSymbols, rules, startSymbol, randomExpansion);
+ }
}
diff --git a/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ContextFreeGrammar.java b/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ContextFreeGrammar.java
index f3ae0d0..c3419ed 100644
--- a/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ContextFreeGrammar.java
+++ b/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ContextFreeGrammar.java
@@ -22,7 +22,6 @@
import java.util.Collection;
import java.util.LinkedList;
import java.util.Random;
-import java.util.Set;
/**
* A context free grammar
diff --git a/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammar.java b/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammar.java
index 12f58c9..63d2760 100644
--- a/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammar.java
+++ b/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammar.java
@@ -143,7 +143,7 @@
return backPointer;
}
- public BackPointer cky(List<String> sentence, ProbabilisticContextFreeGrammar pcfg) {
+ public BackPointer cky(List<String> sentence) {
BackPointer backPointer = null;
int n = sentence.size();
@@ -151,7 +151,7 @@
for (int i = 0; i < n - l; i++) {
int j = i + l;
double max = 0;
- for (String x : pcfg.getNonTerminalSymbols()) {
+ for (String x : getNonTerminalSymbols()) {
for (Rule r : getRulesForNonTerminal(x)) {
for (int s = i; s < j - 1; s++) {
double q = q(r);
diff --git a/nlp-utils/src/main/java/org/apache/opennlp/utils/ngram/NGramUtils.java b/nlp-utils/src/main/java/org/apache/opennlp/utils/ngram/NGramUtils.java
index 88eb721..a7371bc 100644
--- a/nlp-utils/src/main/java/org/apache/opennlp/utils/ngram/NGramUtils.java
+++ b/nlp-utils/src/main/java/org/apache/opennlp/utils/ngram/NGramUtils.java
@@ -32,7 +32,7 @@
for (T[] sentence : sentences) {
int idx0 = contains(sentence, x0);
if (idx0 >= 0) {
- if (idx0 + 2 < sentence.length && x1.equals(sentence[idx0+1]) && x2.equals(sentence[idx0+2])) {
+ if (idx0 + 2 < sentence.length && x1.equals(sentence[idx0 + 1]) && x2.equals(sentence[idx0 + 2])) {
count++;
}
}
@@ -42,7 +42,7 @@
private static <T> int contains(T[] sentence, T word) {
for (int i = 0; i < sentence.length; i++) {
- if (word.equals(sentence[i])){
+ if (word.equals(sentence[i])) {
return i;
}
}
@@ -61,8 +61,7 @@
if (foundPreceding && sequentWord.equals(w)) {
foundPreceding = false;
result++;
- }
- else
+ } else
foundPreceding = false;
}
}
@@ -85,11 +84,11 @@
}
public static <T> Double calculateBigramMLProbability(T sequentWord, T precedingWord, Collection<T[]> set) {
- return count(sequentWord, precedingWord, set)/ count(precedingWord, set);
+ return count(sequentWord, precedingWord, set) / count(precedingWord, set);
}
public static <T> Double calculateTrigramMLProbability(T x0, T x1, T x2, Collection<T[]> sentences) {
- return count(x0, x1, x2, sentences)/ count(x1, x0, sentences);
+ return count(x0, x1, x2, sentences) / count(x1, x0, sentences);
}
public static Double calculateBigramPriorSmoothingProbability(String sequentWord, String precedingWord, Collection<String[]> set, Double k) {
@@ -99,17 +98,17 @@
public static <T> Double calculateUnigramMLProbability(T word, Collection<T[]> set) {
double vocSize = 0d;
for (T[] s : set) {
- vocSize+= s.length;
+ vocSize += s.length;
}
return count(word, set) / vocSize;
}
public static <T> Double calculateLinearInterpolationProbability(T x0, T x1, T x2, Collection<T[]> sentences,
- Double lambda1, Double lambda2, Double lambda3) {
+ Double lambda1, Double lambda2, Double lambda3) {
assert lambda1 + lambda2 + lambda3 == 1 : "lambdas sum should be equals to 1";
assert lambda1 > 0 && lambda2 > 0 && lambda3 > 0 : "lambdas should all be greater than 0";
- return lambda1 * calculateTrigramMLProbability(x0, x1, x2, sentences) +
+ return lambda1 * calculateTrigramMLProbability(x0, x1, x2, sentences) +
lambda2 * calculateBigramMLProbability(x2, x1, sentences) +
lambda3 * calculateUnigramMLProbability(x2, sentences);
@@ -117,7 +116,7 @@
private static <T> Collection<T> flatSet(Collection<T[]> set) {
Collection<T> flatSet = new HashSet<T>();
- for (T[] sentence : set){
+ for (T[] sentence : set) {
flatSet.addAll(Arrays.asList(sentence));
}
return flatSet;
@@ -127,7 +126,7 @@
Double missingMass = 0d;
Double countWord = count(x1, set);
for (T word : flatSet(set)) {
- missingMass += (count(word, x1, set) - discount)/ countWord;
+ missingMass += (count(word, x1, set) - discount) / countWord;
}
return 1 - missingMass;
}
diff --git a/nlp-utils/src/main/java/org/apache/opennlp/utils/regression/GradientDescentUtils.java b/nlp-utils/src/main/java/org/apache/opennlp/utils/regression/GradientDescentUtils.java
index 9fccc61..5de955f 100644
--- a/nlp-utils/src/main/java/org/apache/opennlp/utils/regression/GradientDescentUtils.java
+++ b/nlp-utils/src/main/java/org/apache/opennlp/utils/regression/GradientDescentUtils.java
@@ -19,7 +19,6 @@
package org.apache.opennlp.utils.regression;
import java.util.Arrays;
-
import org.apache.opennlp.utils.TrainingSet;
/**
diff --git a/nlp-utils/src/test/java/org/apache/opennlp/utils/anomalydetection/AnomalyDetectionUtilsTest.java b/nlp-utils/src/test/java/org/apache/opennlp/utils/anomalydetection/AnomalyDetectionUtilsTest.java
index 365cb83..6a6714a 100644
--- a/nlp-utils/src/test/java/org/apache/opennlp/utils/anomalydetection/AnomalyDetectionUtilsTest.java
+++ b/nlp-utils/src/test/java/org/apache/opennlp/utils/anomalydetection/AnomalyDetectionUtilsTest.java
@@ -18,11 +18,10 @@
*/
package org.apache.opennlp.utils.anomalydetection;
-import org.junit.Test;
-
import org.apache.opennlp.utils.TestUtils;
import org.apache.opennlp.utils.TrainingExample;
import org.apache.opennlp.utils.TrainingSet;
+import org.junit.Test;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
diff --git a/nlp-utils/src/test/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammarTest.java b/nlp-utils/src/test/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammarTest.java
index 8a991e0..1101cd9 100644
--- a/nlp-utils/src/test/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammarTest.java
+++ b/nlp-utils/src/test/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammarTest.java
@@ -138,7 +138,7 @@
sentence.add("the");
sentence.add("man");
- ProbabilisticContextFreeGrammar.BackPointer backPointer = pcfg.cky(sentence, pcfg);
+ ProbabilisticContextFreeGrammar.BackPointer backPointer = pcfg.cky(sentence);
check(pcfg, backPointer, sentence);
// fixed sentence two
@@ -148,14 +148,14 @@
sentence.add("works");
sentence.add("nicely");
- backPointer = pcfg.cky(sentence, pcfg);
+ backPointer = pcfg.cky(sentence);
check(pcfg, backPointer, sentence);
// random sentence generated by the grammar
String[] expansion = pcfg.leftMostDerivation("S");
sentence = Arrays.asList(expansion);
- backPointer = pcfg.cky(sentence, pcfg);
+ backPointer = pcfg.cky(sentence);
check(pcfg, backPointer, sentence);
}