LUCENE-4956: move all the list creation out of compoundnounanalyzer
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4956@1536233 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/KoreanFilter.java b/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/KoreanFilter.java
index ba90f15..8bd240d 100644
--- a/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/KoreanFilter.java
+++ b/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/KoreanFilter.java
@@ -30,7 +30,6 @@
import org.apache.lucene.analysis.ko.dic.CompoundEntry;
import org.apache.lucene.analysis.ko.dic.DictionaryUtil;
import org.apache.lucene.analysis.ko.dic.HanjaMapper;
-import org.apache.lucene.analysis.ko.dic.WordEntry;
import org.apache.lucene.analysis.ko.morph.AnalysisOutput;
import org.apache.lucene.analysis.ko.morph.CompoundNounAnalyzer;
import org.apache.lucene.analysis.ko.morph.MorphAnalyzer;
@@ -340,7 +339,7 @@
// 추출된 명사가 복합명사인 경우 분리한다.
for(int i=0;i<maxCandidate;i++) {
- List<CompoundEntry> results = cnAnalyzer.analyze(candiList.get(i).toString());
+ CompoundEntry results[] = cnAnalyzer.analyze(candiList.get(i).toString());
int pos = 0;
int offset = 0;
diff --git a/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/AnalysisOutput.java b/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/AnalysisOutput.java
index a4247b3..c547cc8 100644
--- a/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/AnalysisOutput.java
+++ b/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/AnalysisOutput.java
@@ -190,7 +190,8 @@
// nocommit
public void setCNounList(CompoundEntry[] cnoun) {
- compound.clear();
+ // WTF, something holds on to 'previous' cnoun list after MorphAnalyzer.confirmCnoun sets it to something new.
+ compound = new ArrayList<CompoundEntry>();
addCNouns(cnoun);
}
diff --git a/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/CompoundNounAnalyzer.java b/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/CompoundNounAnalyzer.java
index a1dc380..ceceb02 100644
--- a/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/CompoundNounAnalyzer.java
+++ b/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/CompoundNounAnalyzer.java
@@ -17,10 +17,6 @@
* limitations under the License.
*/
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
import org.apache.lucene.analysis.ko.dic.CompoundEntry;
import org.apache.lucene.analysis.ko.dic.DictionaryUtil;
import org.apache.lucene.analysis.ko.dic.WordEntry;
@@ -36,27 +32,16 @@
}
/** Returns decompounded list for word, or null */
- public List<CompoundEntry> analyze(String input) {
+ public CompoundEntry[] analyze(String input) {
if (input.length() < 3 || input.length() > 20) {
// ignore less than 3 letters or more than 20 letters.
return null;
}
WordEntry entry = DictionaryUtil.getCompoundNoun(input);
if (entry != null) {
- // nocommit
- ArrayList<CompoundEntry> l = new ArrayList<CompoundEntry>();
- l.addAll(Arrays.asList(entry.getCompounds()));
- return l;
+ return entry.getCompounds();
} else {
- CompoundEntry[] compounds = analyze(input, true);
- if (compounds == null) {
- return null;
- } else {
- // nocommit
- ArrayList<CompoundEntry> l = new ArrayList<CompoundEntry>();
- l.addAll(Arrays.asList(compounds));
- return l;
- }
+ return analyze(input, true);
}
}
diff --git a/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphAnalyzer.java b/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphAnalyzer.java
index 07fadba..99b6672 100644
--- a/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphAnalyzer.java
+++ b/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphAnalyzer.java
@@ -374,11 +374,11 @@
if(o.getStem().length()<3) return false;
- List<CompoundEntry> results = cnAnalyzer.analyze(o.getStem());
+ CompoundEntry results[] = cnAnalyzer.analyze(o.getStem());
boolean success = false;
- if(results != null && results.size()>1) {
+ if(results != null && results.length > 1) {
o.setCNounList(results);
success = true;
int maxWordLen = 0;
diff --git a/lucene/analysis/arirang/src/test/org/apache/lucene/analysis/ko/TestCompoundSegment.java b/lucene/analysis/arirang/src/test/org/apache/lucene/analysis/ko/TestCompoundSegment.java
index 1a0a97a..f9954b9 100644
--- a/lucene/analysis/arirang/src/test/org/apache/lucene/analysis/ko/TestCompoundSegment.java
+++ b/lucene/analysis/arirang/src/test/org/apache/lucene/analysis/ko/TestCompoundSegment.java
@@ -52,7 +52,7 @@
*/
private String[] splitByUnitWord(CompoundNounAnalyzer analyzer, String input) throws Exception {
- List<CompoundEntry> results = analyzer.analyze(input);
+ CompoundEntry results[] = analyzer.analyze(input);
List<String> nounList = new ArrayList<String>();
for(CompoundEntry entry : results) {