LUCENE-3030: fix last nocommits
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/blocktree_3030@1158409 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/TODO b/TODO
index d4435a5..c78aaa5 100644
--- a/TODO
+++ b/TODO
@@ -1,8 +1,4 @@
-FAIL: ant test-core -Dtestcase=TestBlockTree -Dtestmethod=testEasy -Dtests.seed=-7181425274859016595:531125480476701899
-
-TestFieldCache can spin w/ -codec Pulsing!?
-
perf tests:
- GRRR -- indexing MUCH slower now?
trunk:
@@ -16,13 +12,8 @@
- test perf on single seg index too!
- try *larger* maxItemsInBlock: could give better net perf? ie less seeking and more scanning
- - run fuzzy q under profiler
-test if cutting over prefix query to .intersect is faster
-
-"upgrade" postings reader/writer API to match block tree; then non-tree codec uses it too and we only need 1 impl of each postings
-
-what to do about short terms that "force" a block to mark itself has hasTerms!!??
+what to do about short terms that "force" a block to mark itself as hasTerms!!??
- maybe instead of "isLeafBlock" bit we encode "countUntilNextTerm"? this way, for a block that only has empty string term we can stop scanning quickly?
- maybe make a "terms block cache" that holds low-prefix LRU term blocks in ram...?
- maybe a cache holding all short-length terms will be big perf boost? it saves having to scan the low-depth blocks... or... maybe a bit noting whether this block contains any terms != empty string suffix; or, we separately hold all 'short'/'straggler' terms in a map, enabling the low-depth blocks to then 'lie' and say they have no terms?
@@ -30,11 +21,8 @@
try forcing no hasTerms if depth < 2?
-automaton q should apply maxlength test on each scan'd term
-
-intersect should use suffix ref
-
LATER:
+ - test if cutting over prefix query to .intersect is faster
- maybe blocks should NOT store sub-block pointers? it's reudundant w/ the index...
- hmm: maybe switch PKLookupTask to intersect!? do we have fast string builder?
- hmm -- fix DOT when there are multiple outputs!? oh, maybe not -- it just works?
diff --git a/lucene/src/java/org/apache/lucene/index/AutomatonTermsEnum.java b/lucene/src/java/org/apache/lucene/index/AutomatonTermsEnum.java
index 9b425fb..1a0ca33 100644
--- a/lucene/src/java/org/apache/lucene/index/AutomatonTermsEnum.java
+++ b/lucene/src/java/org/apache/lucene/index/AutomatonTermsEnum.java
@@ -83,6 +83,7 @@
super(tenum);
this.finite = compiled.finite;
this.runAutomaton = compiled.runAutomaton;
+ assert this.runAutomaton != null;
this.commonSuffixRef = compiled.commonSuffixRef;
this.allTransitions = compiled.sortedTransitions;
diff --git a/lucene/src/java/org/apache/lucene/index/Terms.java b/lucene/src/java/org/apache/lucene/index/Terms.java
index 7486b6e..55517e5 100644
--- a/lucene/src/java/org/apache/lucene/index/Terms.java
+++ b/lucene/src/java/org/apache/lucene/index/Terms.java
@@ -55,6 +55,9 @@
// TODO: eventually we could support seekCeil/Exact on
// the returned enum, instead of only being able to seek
// at the start
+ if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
+ throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
+ }
if (startTerm == null) {
return new AutomatonTermsEnum(iterator(), compiled);
} else {
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsReader.java b/lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsReader.java
index 6ed3149..2955f11 100644
--- a/lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsReader.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsReader.java
@@ -270,6 +270,9 @@
try {
return b.utf8ToString() + " " + b;
} catch (Throwable t) {
+ // If BytesRef isn't actually UTF8, or it's eg a
+ // prefix of UTF8 that ends mid-unicode-char, we
+ // fallback to hex:
return b.toString();
}
}
@@ -482,9 +485,9 @@
@Override
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
- // nocommit -- must check suffix ref
- // nocommit -- if A is infinte maybe fallback to ATE?
- // need to test...
+ if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
+ throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
+ }
return new IntersectEnum(compiled, startTerm);
}
@@ -1045,17 +1048,40 @@
continue nextTerm;
}
- // nocommit -- in the case where common suffix
- // len is > currentFrame.suffix we should check
- // the suffix of the currentFrame.prefix too!
- final int lenToCheck = Math.min(currentFrame.suffix, compiledAutomaton.commonSuffixRef.length);
- final byte[] b1 = currentFrame.suffixBytes;
- int pos1 = currentFrame.startBytePos + currentFrame.suffix - lenToCheck;
- final byte[] b2 = compiledAutomaton.commonSuffixRef.bytes;
- int pos2 = compiledAutomaton.commonSuffixRef.offset + compiledAutomaton.commonSuffixRef.length - lenToCheck;
- final int pos2End = pos2 + lenToCheck;
- while(pos2 < pos2End) {
- if (b1[pos1++] != b2[pos2++]) {
+ final byte[] suffixBytes = currentFrame.suffixBytes;
+ final byte[] commonSuffixBytes = compiledAutomaton.commonSuffixRef.bytes;
+
+ final int lenInPrefix = compiledAutomaton.commonSuffixRef.length - currentFrame.suffix;
+ final int suffixLenToCheck;
+ assert compiledAutomaton.commonSuffixRef.offset == 0;
+ int suffixBytesPos;
+ int commonSuffixBytesPos = 0;
+
+ if (lenInPrefix > 0) {
+ // A prefix of the common suffix overlaps with
+ // the suffix of the block prefix so we first
+ // test whether the prefix part matches:
+ final byte[] termBytes = term.bytes;
+ int termBytesPos = currentFrame.prefix - lenInPrefix;
+ assert termBytesPos >= 0;
+ final int termBytesPosEnd = currentFrame.prefix;
+ while (termBytesPos < termBytesPosEnd) {
+ if (termBytes[termBytesPos++] != commonSuffixBytes[commonSuffixBytesPos++]) {
+ if (DEBUG) {
+ System.out.println(" skip: common suffix mismatch (in prefix)");
+ }
+ continue nextTerm;
+ }
+ }
+ suffixBytesPos = currentFrame.startBytePos;
+ } else {
+ suffixBytesPos = currentFrame.startBytePos + currentFrame.suffix - compiledAutomaton.commonSuffixRef.length;
+ }
+
+ // Test overlapping suffix part:
+ final int commonSuffixBytesPosEnd = compiledAutomaton.commonSuffixRef.length;
+ while (commonSuffixBytesPos < commonSuffixBytesPosEnd) {
+ if (suffixBytes[suffixBytesPos++] != commonSuffixBytes[commonSuffixBytesPos++]) {
if (DEBUG) {
System.out.println(" skip: common suffix mismatch");
}
@@ -1064,6 +1090,12 @@
}
}
+ // TODO: maybe we should do the same linear test
+ // that AutomatonTermsEnum does, so that if we
+ // reach a part of the automaton where .* is
+ // "temporarily" accepted, we just blindly .next()
+ // until the limit
+
// See if the term prefix matches the automaton:
int state = currentFrame.state;
for (int idx=0;idx<currentFrame.suffix;idx++) {
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsWriter.java b/lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsWriter.java
index e685135..fddb0b4 100644
--- a/lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsWriter.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsWriter.java
@@ -423,8 +423,9 @@
// First pass: count up how many items fall under
// each unique label after the prefix.
- // nocommit: this is wasteful since the builder had
- // already done this but we discarded it...
+ // TODO: this is wasteful since the builder had
+ // already done this (partitioned these sub-terms
+ // according to their leading prefix byte)
final List<Object> slice = pending.subList(pending.size()-count, pending.size());
int lastLabel = -1;
diff --git a/lucene/src/java/org/apache/lucene/search/AutomatonQuery.java b/lucene/src/java/org/apache/lucene/search/AutomatonQuery.java
index 82c450a..bfca59c 100644
--- a/lucene/src/java/org/apache/lucene/search/AutomatonQuery.java
+++ b/lucene/src/java/org/apache/lucene/search/AutomatonQuery.java
@@ -51,20 +51,10 @@
public class AutomatonQuery extends MultiTermQuery {
/** the automaton to match index terms against */
protected final Automaton automaton;
+ protected final CompiledAutomaton compiled;
/** term containing the field, and possibly some pattern structure */
protected final Term term;
- /**
- * abstraction for returning a termsenum:
- * in the ctor the query computes one of these, the actual
- * implementation depends upon the automaton's structure.
- */
- private abstract class TermsEnumFactory {
- protected abstract TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException;
- }
-
- private final TermsEnumFactory factory;
-
/**
* Create a new AutomatonQuery from an {@link Automaton}.
*
@@ -77,72 +67,12 @@
super(term.field());
this.term = term;
this.automaton = automaton;
-
- if (BasicOperations.isEmpty(automaton)) {
- // matches nothing
- factory = new TermsEnumFactory() {
- @Override
- protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
- return TermsEnum.EMPTY;
- }
- };
- } else if (BasicOperations.isTotal(automaton)) {
- // matches all possible strings
- factory = new TermsEnumFactory() {
- @Override
- protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
- return terms.iterator();
- }
- };
- } else {
- final String singleton;
- final String commonPrefix;
-
- if (automaton.getSingleton() == null) {
- commonPrefix = SpecialOperations.getCommonPrefix(automaton);
- if (commonPrefix.length() > 0 && BasicOperations.sameLanguage(automaton, BasicAutomata.makeString(commonPrefix))) {
- singleton = commonPrefix;
- } else {
- singleton = null;
- }
- } else {
- commonPrefix = null;
- singleton = automaton.getSingleton();
- }
-
- if (singleton != null) {
- // matches a fixed string in singleton or expanded representation
- factory = new TermsEnumFactory() {
- @Override
- protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
- return new SingleTermsEnum(terms.iterator(), new Term(field, singleton));
- }
- };
- } else if (BasicOperations.sameLanguage(automaton, BasicOperations.concatenate(
- BasicAutomata.makeString(commonPrefix), BasicAutomata.makeAnyString()))) {
- // matches a constant prefix
- factory = new TermsEnumFactory() {
- @Override
- protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
- return new PrefixTermsEnum(terms.iterator(), new Term(field, commonPrefix));
- }
- };
- } else {
- final CompiledAutomaton compiled =
- new CompiledAutomaton(automaton, SpecialOperations.isFinite(automaton));
- factory = new TermsEnumFactory() {
- @Override
- protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
- return terms.intersect(compiled, null);
- }
- };
- }
- }
+ this.compiled = new CompiledAutomaton(automaton);
}
@Override
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
- return factory.getTermsEnum(terms, atts);
+ return compiled.getTermsEnum(terms);
}
@Override
diff --git a/lucene/src/java/org/apache/lucene/search/FuzzyQuery.java b/lucene/src/java/org/apache/lucene/search/FuzzyQuery.java
index a7ea7d8..fa9f077 100644
--- a/lucene/src/java/org/apache/lucene/search/FuzzyQuery.java
+++ b/lucene/src/java/org/apache/lucene/search/FuzzyQuery.java
@@ -138,7 +138,7 @@
@Override
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
if (!termLongEnough) { // can only match if it's exact
- return new SingleTermsEnum(terms.iterator(), term);
+ return new SingleTermsEnum(terms.iterator(), term.bytes());
}
return new FuzzyTermsEnum(terms, atts, getTerm(), minimumSimilarity, prefixLength);
}
diff --git a/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java b/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
index 03a9ffc..64c057c 100644
--- a/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
+++ b/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
@@ -172,7 +172,7 @@
UnicodeUtil.newString(termText, 0, realPrefixLength));
a = BasicOperations.concatenate(prefix, a);
}
- runAutomata.add(new CompiledAutomaton(a, true));
+ runAutomata.add(new CompiledAutomaton(a, true, false));
}
}
return runAutomata;
diff --git a/lucene/src/java/org/apache/lucene/search/PrefixQuery.java b/lucene/src/java/org/apache/lucene/search/PrefixQuery.java
index 27a89bb..42cdebf 100644
--- a/lucene/src/java/org/apache/lucene/search/PrefixQuery.java
+++ b/lucene/src/java/org/apache/lucene/search/PrefixQuery.java
@@ -51,7 +51,7 @@
// no prefix -- match all terms for this field:
return tenum;
}
- return new PrefixTermsEnum(tenum, prefix);
+ return new PrefixTermsEnum(tenum, prefix.bytes());
}
/** Prints a user-readable version of this query. */
diff --git a/lucene/src/java/org/apache/lucene/search/PrefixTermsEnum.java b/lucene/src/java/org/apache/lucene/search/PrefixTermsEnum.java
index a5f8fcf..c5bdd08 100644
--- a/lucene/src/java/org/apache/lucene/search/PrefixTermsEnum.java
+++ b/lucene/src/java/org/apache/lucene/search/PrefixTermsEnum.java
@@ -34,9 +34,9 @@
private final BytesRef prefixRef;
- public PrefixTermsEnum(TermsEnum tenum, Term prefix) throws IOException {
+ public PrefixTermsEnum(TermsEnum tenum, BytesRef prefixText) throws IOException {
super(tenum);
- setInitialSeekTerm(prefixRef = prefix.bytes());
+ setInitialSeekTerm(this.prefixRef = prefixText);
}
@Override
diff --git a/lucene/src/java/org/apache/lucene/search/SingleTermsEnum.java b/lucene/src/java/org/apache/lucene/search/SingleTermsEnum.java
index 55fadef..c2726a2 100644
--- a/lucene/src/java/org/apache/lucene/search/SingleTermsEnum.java
+++ b/lucene/src/java/org/apache/lucene/search/SingleTermsEnum.java
@@ -39,10 +39,10 @@
* After calling the constructor the enumeration is already pointing to the term,
* if it exists.
*/
- public SingleTermsEnum(TermsEnum tenum, Term singleTerm) throws IOException {
+ public SingleTermsEnum(TermsEnum tenum, BytesRef termText) throws IOException {
super(tenum);
- singleRef = singleTerm.bytes();
- setInitialSeekTerm(singleRef);
+ singleRef = termText;
+ setInitialSeekTerm(termText);
}
@Override
diff --git a/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java b/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java
index 4e633a7..0a75f37 100644
--- a/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java
+++ b/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java
@@ -89,13 +89,13 @@
// for assert:
private BytesRef lastTerm;
- private boolean compareToLastTerm(BytesRef t) {
+ private boolean compareToLastTerm(BytesRef t) throws IOException {
if (lastTerm == null && t != null) {
lastTerm = new BytesRef(t);
} else if (t == null) {
lastTerm = null;
} else {
- assert lastTerm.compareTo(t) < 0: "lastTerm=" + lastTerm + " t=" + t;
+ assert termsEnum.getComparator().compare(lastTerm, t) < 0: "lastTerm=" + lastTerm + " t=" + t;
lastTerm.copy(t);
}
return true;
@@ -106,10 +106,7 @@
final float boost = boostAtt.getBoost();
// make sure within a single seg we always collect
- // terms in order -- nocommit dangerous, if codec
- // uses its own termComp? should we nuke that
- // ability? terms must always be "unsigned byte[]
- // order"?
+ // terms in order
assert compareToLastTerm(bytes);
//System.out.println("TTR.collect term=" + bytes.utf8ToString() + " boost=" + boost + " ord=" + readerContext.ord);
@@ -160,11 +157,7 @@
final Q q = getTopLevelQuery();
final ScoreTerm[] scoreTerms = stQueue.toArray(new ScoreTerm[stQueue.size()]);
ArrayUtil.mergeSort(scoreTerms, scoreTermSortByTermComp);
- // nocommit -- how come I see 2 seekExacts for the same
- // term in a row in the terms dict, around here...?
- // could this be if we are using a non-intersect() capable terms impl (e.g. preflex)
- // that its the off-by-one regarding 'start term' ? are we seeking backwards!!!!!!
for (final ScoreTerm st : scoreTerms) {
final Term term = new Term(query.field, st.bytes);
assert reader.docFreq(term) == st.termState.docFreq() : "reader DF is " + reader.docFreq(term) + " vs " + st.termState.docFreq() + " term=" + term;
diff --git a/lucene/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java b/lucene/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java
index 7ef39ba..6dc9105 100644
--- a/lucene/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java
+++ b/lucene/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java
@@ -17,10 +17,17 @@
* limitations under the License.
*/
+import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.codecs.BlockTreeTermsWriter;
+import org.apache.lucene.search.PrefixTermsEnum;
+import org.apache.lucene.search.SingleTermsEnum;
+import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
/**
@@ -31,29 +38,106 @@
* @lucene.experimental
*/
public class CompiledAutomaton {
+ public enum AUTOMATON_TYPE {NONE, ALL, SINGLE, PREFIX, NORMAL};
+ public final AUTOMATON_TYPE type;
+
+ // For PREFIX, this is the prefix term; for SINGLE this is
+ // the singleton term:
+ public final BytesRef term;
+
+ // NOTE: the next 4 members are only non-null if type ==
+ // NORMAL:
public final ByteRunAutomaton runAutomaton;
// TODO: would be nice if these sortedTransitions had "int
// to;" instead of "State to;" somehow:
public final Transition[][] sortedTransitions;
public final BytesRef commonSuffixRef;
- public final boolean finite;
+ public final Boolean finite;
- // nocommit -- move pulling of a TermsEnum into here, so
- // that we can optimize for cases where a simpler enume
- // (prefix enum, all terms, no terms, etc.) can be used
-
- public CompiledAutomaton(Automaton automaton, boolean finite) {
+ public CompiledAutomaton(Automaton automaton) {
+ this(automaton, null, true);
+ }
+
+ public CompiledAutomaton(Automaton automaton, Boolean finite, boolean simplify) {
+
+ if (simplify) {
+ // Test whether the automaton is a "simple" form and
+ // if so, don't create a runAutomaton. Note that on a
+ // large automaton these tests could be costly:
+ if (BasicOperations.isEmpty(automaton)) {
+ // matches nothing
+ type = AUTOMATON_TYPE.NONE;
+ term = null;
+ commonSuffixRef = null;
+ runAutomaton = null;
+ sortedTransitions = null;
+ this.finite = null;
+ return;
+ } else if (BasicOperations.isTotal(automaton)) {
+ // matches all possible strings
+ type = AUTOMATON_TYPE.ALL;
+ term = null;
+ commonSuffixRef = null;
+ runAutomaton = null;
+ sortedTransitions = null;
+ this.finite = null;
+ return;
+ } else {
+ final String commonPrefix;
+ final String singleton;
+ if (automaton.getSingleton() == null) {
+ commonPrefix = SpecialOperations.getCommonPrefix(automaton);
+ if (commonPrefix.length() > 0 && BasicOperations.sameLanguage(automaton, BasicAutomata.makeString(commonPrefix))) {
+ singleton = commonPrefix;
+ } else {
+ singleton = null;
+ }
+ } else {
+ commonPrefix = null;
+ singleton = automaton.getSingleton();
+ }
+
+ if (singleton != null) {
+ // matches a fixed string in singleton or expanded
+ // representation
+ type = AUTOMATON_TYPE.SINGLE;
+ term = new BytesRef(singleton);
+ commonSuffixRef = null;
+ runAutomaton = null;
+ sortedTransitions = null;
+ this.finite = null;
+ return;
+ } else if (BasicOperations.sameLanguage(automaton, BasicOperations.concatenate(
+ BasicAutomata.makeString(commonPrefix), BasicAutomata.makeAnyString()))) {
+ // matches a constant prefix
+ type = AUTOMATON_TYPE.PREFIX;
+ term = new BytesRef(commonPrefix);
+ commonSuffixRef = null;
+ runAutomaton = null;
+ sortedTransitions = null;
+ this.finite = null;
+ return;
+ }
+ }
+ }
+
+ type = AUTOMATON_TYPE.NORMAL;
+ term = null;
+ if (finite == null) {
+ this.finite = SpecialOperations.isFinite(automaton);
+ } else {
+ this.finite = finite;
+ }
Automaton utf8 = new UTF32ToUTF8().convert(automaton);
- runAutomaton = new ByteRunAutomaton(utf8, true);
- sortedTransitions = utf8.getSortedTransitions();
- this.finite = finite;
- if (finite) {
+ if (this.finite) {
commonSuffixRef = null;
} else {
commonSuffixRef = SpecialOperations.getCommonSuffixBytesRef(utf8);
}
+ runAutomaton = new ByteRunAutomaton(utf8, true);
+ sortedTransitions = utf8.getSortedTransitions();
}
-
+
private static final boolean DEBUG = BlockTreeTermsWriter.DEBUG;
private BytesRef addTail(int state, BytesRef term, int idx, int leadLabel) {
@@ -109,6 +193,29 @@
}
}
+ // TODO: should this take startTerm too? This way
+ // Terms.intersect could forward to this method if type !=
+ // NORMAL:
+ public TermsEnum getTermsEnum(Terms terms) throws IOException {
+ switch(type) {
+ case NONE:
+ return TermsEnum.EMPTY;
+ case ALL:
+ return terms.iterator();
+ case SINGLE:
+ return new SingleTermsEnum(terms.iterator(), term);
+ case PREFIX:
+ // TODO: this is very likely faster than .intersect,
+ // but we should test and maybe cutover
+ return new PrefixTermsEnum(terms.iterator(), term);
+ case NORMAL:
+ return terms.intersect(this, null);
+ default:
+ // unreachable
+ throw new RuntimeException("unhandled case");
+ }
+ }
+
/** Finds largest term accepted by this Automaton, that's
* <= the provided input term. The result is placed in
* output; it's fine for output and input to point to
diff --git a/lucene/src/java/org/apache/lucene/util/fst/Util.java b/lucene/src/java/org/apache/lucene/util/fst/Util.java
index 5aa638f..4b4e7ad 100644
--- a/lucene/src/java/org/apache/lucene/util/fst/Util.java
+++ b/lucene/src/java/org/apache/lucene/util/fst/Util.java
@@ -318,7 +318,6 @@
}
if (!fst.targetHasArcs(arc) && arc.isFinal() && arc.nextFinalOutput != NO_OUTPUT) {
- // nocommit -- this is broken again?
// Tricky special case: sometimes, due to
// pruning, the builder can [sillily] produce
// an FST with an arc into the final end state
diff --git a/lucene/src/test/org/apache/lucene/index/TestTermsEnum.java b/lucene/src/test/org/apache/lucene/index/TestTermsEnum.java
index 8dba83d..1861e7d 100644
--- a/lucene/src/test/org/apache/lucene/index/TestTermsEnum.java
+++ b/lucene/src/test/org/apache/lucene/index/TestTermsEnum.java
@@ -270,7 +270,7 @@
}
a = DaciukMihovAutomatonBuilder.build(sortedAcceptTerms);
}
- final CompiledAutomaton c = new CompiledAutomaton(a, true);
+ final CompiledAutomaton c = new CompiledAutomaton(a, true, false);
final BytesRef[] acceptTermsArray = new BytesRef[acceptTerms.size()];
final Set<BytesRef> acceptTermsSet = new HashSet<BytesRef>();
diff --git a/lucene/src/test/org/apache/lucene/index/TestTermsEnum2.java b/lucene/src/test/org/apache/lucene/index/TestTermsEnum2.java
index 6612360..cb60e0c 100644
--- a/lucene/src/test/org/apache/lucene/index/TestTermsEnum2.java
+++ b/lucene/src/test/org/apache/lucene/index/TestTermsEnum2.java
@@ -165,7 +165,7 @@
for (int i = 0; i < numIterations; i++) {
String reg = AutomatonTestUtil.randomRegexp(random);
Automaton automaton = new RegExp(reg, RegExp.NONE).toAutomaton();
- CompiledAutomaton ca = new CompiledAutomaton(automaton, SpecialOperations.isFinite(automaton));
+ CompiledAutomaton ca = new CompiledAutomaton(automaton, SpecialOperations.isFinite(automaton), false);
TermsEnum te = MultiFields.getTerms(reader, "field").intersect(ca, null);
Automaton expected = BasicOperations.intersection(termsAutomaton, automaton);
TreeSet<BytesRef> found = new TreeSet<BytesRef>();
diff --git a/lucene/src/test/org/apache/lucene/util/automaton/TestCompiledAutomaton.java b/lucene/src/test/org/apache/lucene/util/automaton/TestCompiledAutomaton.java
index 5fe994c..f346d4c 100644
--- a/lucene/src/test/org/apache/lucene/util/automaton/TestCompiledAutomaton.java
+++ b/lucene/src/test/org/apache/lucene/util/automaton/TestCompiledAutomaton.java
@@ -36,7 +36,7 @@
}
Automaton a = BasicOperations.union(as);
a.determinize();
- return new CompiledAutomaton(a, true);
+ return new CompiledAutomaton(a, true, false);
}
private void testFloor(CompiledAutomaton c, String input, String expected) {
diff --git a/lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java b/lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java
index c6cb6ce..1ba73c3 100644
--- a/lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java
+++ b/lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java
@@ -1707,6 +1707,7 @@
StringWriter w = new StringWriter();
Util.toDot(fst, w, false, false);
w.close();
+ //System.out.println(w.toString());
assertTrue(w.toString().indexOf("label=\"t/[7]\"") != -1);
}
@@ -1721,6 +1722,7 @@
//Writer w = new OutputStreamWriter(new FileOutputStream("/x/tmp/out.dot"));
Util.toDot(fst, w, false, false);
w.close();
+ //System.out.println(w.toString());
assertTrue(w.toString().indexOf("6 [shape=doublecircle") != -1);
}