blob: a95f0952495c34eec98ba1fbefa68587db75aa37 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Objects;
import java.util.Random;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CannedTokenStream;
import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Transition;
@LuceneTestCase.SuppressCodecs("SimpleText")
public class TestTermAutomatonQuery extends LuceneTestCase {
// "comes * sun"
public void testBasic1() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
// matches
doc.add(newTextField("field", "here comes the sun", Field.Store.NO));
w.addDocument(doc);
doc = new Document();
// doesn't match
doc.add(newTextField("field", "here comes the other sun", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
IndexSearcher s = newSearcher(r);
TermAutomatonQuery q = new TermAutomatonQuery("field");
int init = q.createState();
int s1 = q.createState();
q.addTransition(init, s1, "comes");
int s2 = q.createState();
q.addAnyTransition(s1, s2);
int s3 = q.createState();
q.setAccept(s3, true);
q.addTransition(s2, s3, "sun");
q.finish();
assertEquals(1, s.search(q, 1).totalHits.value);
w.close();
r.close();
dir.close();
}
// "comes * (sun|moon)"
public void testBasicSynonym() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newTextField("field", "here comes the sun", Field.Store.NO));
w.addDocument(doc);
doc = new Document();
doc.add(newTextField("field", "here comes the moon", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
IndexSearcher s = newSearcher(r);
TermAutomatonQuery q = new TermAutomatonQuery("field");
int init = q.createState();
int s1 = q.createState();
q.addTransition(init, s1, "comes");
int s2 = q.createState();
q.addAnyTransition(s1, s2);
int s3 = q.createState();
q.setAccept(s3, true);
q.addTransition(s2, s3, "sun");
q.addTransition(s2, s3, "moon");
q.finish();
assertEquals(2, s.search(q, 1).totalHits.value);
w.close();
r.close();
dir.close();
}
// "comes sun" or "comes * sun"
public void testBasicSlop() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newTextField("field", "here comes the sun", Field.Store.NO));
w.addDocument(doc);
doc = new Document();
doc.add(newTextField("field", "here comes sun", Field.Store.NO));
w.addDocument(doc);
doc = new Document();
doc.add(newTextField("field", "here comes the other sun", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
IndexSearcher s = newSearcher(r);
TermAutomatonQuery q = new TermAutomatonQuery("field");
int init = q.createState();
int s1 = q.createState();
q.addTransition(init, s1, "comes");
int s2 = q.createState();
q.addAnyTransition(s1, s2);
int s3 = q.createState();
q.setAccept(s3, true);
q.addTransition(s1, s3, "sun");
q.addTransition(s2, s3, "sun");
q.finish();
assertEquals(2, s.search(q, 1).totalHits.value);
w.close();
r.close();
dir.close();
}
// Verify posLength is "respected" at query time: index "speedy wifi
// network", search on "fast wi fi network" using (simulated!)
// query-time syn filter to add "wifi" over "wi fi" with posLength=2.
// To make this real we need a version of TS2A that operates on whole
// terms, not characters.
public void testPosLengthAtQueryTimeMock() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newTextField("field", "speedy wifi network", Field.Store.NO));
w.addDocument(doc);
doc = new Document();
doc.add(newTextField("field", "speedy wi fi network", Field.Store.NO));
w.addDocument(doc);
doc = new Document();
doc.add(newTextField("field", "fast wifi network", Field.Store.NO));
w.addDocument(doc);
doc = new Document();
doc.add(newTextField("field", "fast wi fi network", Field.Store.NO));
w.addDocument(doc);
// doesn't match:
doc = new Document();
doc.add(newTextField("field", "slow wi fi network", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
IndexSearcher s = newSearcher(r);
TermAutomatonQuery q = new TermAutomatonQuery("field");
int init = q.createState();
int s1 = q.createState();
q.addTransition(init, s1, "fast");
q.addTransition(init, s1, "speedy");
int s2 = q.createState();
int s3 = q.createState();
q.addTransition(s1, s2, "wi");
q.addTransition(s1, s3, "wifi");
q.addTransition(s2, s3, "fi");
int s4 = q.createState();
q.addTransition(s3, s4, "network");
q.setAccept(s4, true);
q.finish();
// System.out.println("DOT:\n" + q.toDot());
assertEquals(4, s.search(q, 1).totalHits.value);
w.close();
r.close();
dir.close();
}
public void testPosLengthAtQueryTimeTrueish() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newTextField("field", "speedy wifi network", Field.Store.NO));
w.addDocument(doc);
doc = new Document();
doc.add(newTextField("field", "speedy wi fi network", Field.Store.NO));
w.addDocument(doc);
doc = new Document();
doc.add(newTextField("field", "fast wifi network", Field.Store.NO));
w.addDocument(doc);
doc = new Document();
doc.add(newTextField("field", "fast wi fi network", Field.Store.NO));
w.addDocument(doc);
// doesn't match:
doc = new Document();
doc.add(newTextField("field", "slow wi fi network", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
IndexSearcher s = newSearcher(r);
TokenStream ts = new CannedTokenStream(new Token[] {
token("fast", 1, 1),
token("speedy", 0, 1),
token("wi", 1, 1),
token("wifi", 0, 2),
token("fi", 1, 1),
token("network", 1, 1)
});
TermAutomatonQuery q = new TokenStreamToTermAutomatonQuery().toQuery("field", ts);
// System.out.println("DOT: " + q.toDot());
assertEquals(4, s.search(q, 1).totalHits.value);
w.close();
r.close();
dir.close();
}
public void testSegsMissingTerms() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newTextField("field", "here comes the sun", Field.Store.NO));
w.addDocument(doc);
w.commit();
doc = new Document();
doc.add(newTextField("field", "here comes the moon", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
IndexSearcher s = newSearcher(r);
TermAutomatonQuery q = new TermAutomatonQuery("field");
int init = q.createState();
int s1 = q.createState();
q.addTransition(init, s1, "comes");
int s2 = q.createState();
q.addAnyTransition(s1, s2);
int s3 = q.createState();
q.setAccept(s3, true);
q.addTransition(s2, s3, "sun");
q.addTransition(s2, s3, "moon");
q.finish();
assertEquals(2, s.search(q, 1).totalHits.value);
w.close();
r.close();
dir.close();
}
public void testInvalidLeadWithAny() throws Exception {
TermAutomatonQuery q = new TermAutomatonQuery("field");
int s0 = q.createState();
int s1 = q.createState();
int s2 = q.createState();
q.setAccept(s2, true);
q.addAnyTransition(s0, s1);
q.addTransition(s1, s2, "b");
expectThrows(IllegalStateException.class, () -> {
q.finish();
});
}
public void testInvalidTrailWithAny() throws Exception {
TermAutomatonQuery q = new TermAutomatonQuery("field");
int s0 = q.createState();
int s1 = q.createState();
int s2 = q.createState();
q.setAccept(s2, true);
q.addTransition(s0, s1, "b");
q.addAnyTransition(s1, s2);
expectThrows(IllegalStateException.class, () -> {
q.finish();
});
}
public void testAnyFromTokenStream() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newTextField("field", "here comes the sun", Field.Store.NO));
w.addDocument(doc);
doc = new Document();
doc.add(newTextField("field", "here comes the moon", Field.Store.NO));
w.addDocument(doc);
doc = new Document();
doc.add(newTextField("field", "here comes sun", Field.Store.NO));
w.addDocument(doc);
// Should not match:
doc = new Document();
doc.add(newTextField("field", "here comes the other sun", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
IndexSearcher s = newSearcher(r);
TokenStream ts = new CannedTokenStream(new Token[] {
token("comes", 1, 1),
token("comes", 0, 2),
token("*", 1, 1),
token("sun", 1, 1),
token("moon", 0, 1)
});
TermAutomatonQuery q = new TokenStreamToTermAutomatonQuery().toQuery("field", ts);
// System.out.println("DOT: " + q.toDot());
assertEquals(3, s.search(q, 1).totalHits.value);
w.close();
r.close();
dir.close();
}
private static Token token(String term, int posInc, int posLength) {
final Token t = new Token(term, 0, term.length());
t.setPositionIncrement(posInc);
t.setPositionLength(posLength);
return t;
}
private static class RandomSynonymFilter extends TokenFilter {
private boolean synNext;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
public RandomSynonymFilter(TokenFilter in) {
super(in);
}
@Override
public boolean incrementToken() throws IOException {
if (synNext) {
AttributeSource.State state = captureState();
clearAttributes();
restoreState(state);
posIncAtt.setPositionIncrement(0);
termAtt.append(""+((char) 97 + random().nextInt(3)));
synNext = false;
return true;
}
if (input.incrementToken()) {
if (random().nextInt(10) == 8) {
synNext = true;
}
return true;
} else {
return false;
}
}
@Override
public void reset() throws IOException {
super.reset();
synNext = false;
}
}
public void testRandom() throws Exception {
int numDocs = atLeast(50);
Directory dir = newDirectory();
// Adds occasional random synonyms:
Analyzer analyzer = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String fieldName) {
MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true, 100);
tokenizer.setEnableChecks(true);
TokenFilter filt = new MockTokenFilter(tokenizer, MockTokenFilter.EMPTY_STOPSET);
filt = new RandomSynonymFilter(filt);
return new TokenStreamComponents(tokenizer, filt);
}
};
IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
for(int i=0;i<numDocs;i++) {
Document doc = new Document();
int numTokens = atLeast(10);
StringBuilder sb = new StringBuilder();
for(int j=0;j<numTokens;j++) {
sb.append(' ');
sb.append((char) (97 + random().nextInt(3)));
}
String contents = sb.toString();
doc.add(newTextField("field", contents, Field.Store.NO));
doc.add(new StoredField("id", ""+i));
if (VERBOSE) {
System.out.println(" doc " + i + " -> " + contents);
}
w.addDocument(doc);
}
IndexReader r = w.getReader();
w.close();
IndexSearcher s = newSearcher(r);
// Used to match ANY using MultiPhraseQuery:
Term[] allTerms = new Term[] {new Term("field", "a"),
new Term("field", "b"),
new Term("field", "c")};
int numIters = atLeast(1000);
for(int iter=0;iter<numIters;iter++) {
// Build the (finite, no any transitions) TermAutomatonQuery and
// also the "equivalent" BooleanQuery and make sure they match the
// same docs:
BooleanQuery.Builder bq = new BooleanQuery.Builder();
int count = TestUtil.nextInt(random(), 1, 5);
Set<BytesRef> strings = new HashSet<>();
for(int i=0;i<count;i++) {
StringBuilder sb = new StringBuilder();
int numTokens = TestUtil.nextInt(random(), 1, 5);
for(int j=0;j<numTokens;j++) {
if (j > 0 && j < numTokens-1 && random().nextInt(5) == 3) {
sb.append('*');
} else {
sb.append((char) (97 + random().nextInt(3)));
}
}
String string = sb.toString();
MultiPhraseQuery.Builder mpqb = new MultiPhraseQuery.Builder();
for(int j=0;j<string.length();j++) {
if (string.charAt(j) == '*') {
mpqb.add(allTerms);
} else {
mpqb.add(new Term("field", ""+string.charAt(j)));
}
}
bq.add(mpqb.build(), BooleanClause.Occur.SHOULD);
strings.add(new BytesRef(string));
}
List<BytesRef> stringsList = new ArrayList<>(strings);
Collections.sort(stringsList);
Automaton a = Automata.makeStringUnion(stringsList);
// Translate automaton to query:
TermAutomatonQuery q = new TermAutomatonQuery("field");
int numStates = a.getNumStates();
for(int i=0;i<numStates;i++) {
q.createState();
q.setAccept(i, a.isAccept(i));
}
Transition t = new Transition();
for(int i=0;i<numStates;i++) {
int transCount = a.initTransition(i, t);
for(int j=0;j<transCount;j++) {
a.getNextTransition(t);
for(int label=t.min;label<=t.max;label++) {
if ((char) label == '*') {
q.addAnyTransition(t.source, t.dest);
} else {
q.addTransition(t.source, t.dest, ""+(char) label);
}
}
}
}
q.finish();
if (VERBOSE) {
System.out.println("TEST: iter=" + iter);
for(BytesRef string : stringsList) {
System.out.println(" string: " + string.utf8ToString());
}
System.out.println(q.toDot());
}
Query q1 = q;
Query q2 = bq.build();
if (random().nextInt(5) == 1) {
if (VERBOSE) {
System.out.println(" use random filter");
}
RandomQuery filter = new RandomQuery(random().nextLong(), random().nextFloat());
q1 = new BooleanQuery.Builder()
.add(q1, Occur.MUST)
.add(filter, Occur.FILTER)
.build();
q2 = new BooleanQuery.Builder()
.add(q2, Occur.MUST)
.add(filter, Occur.FILTER)
.build();
}
TopDocs hits1 = s.search(q1, numDocs);
TopDocs hits2 = s.search(q2, numDocs);
Set<String> hits1Docs = toDocIDs(s, hits1);
Set<String> hits2Docs = toDocIDs(s, hits2);
try {
assertEquals(hits2.totalHits.value, hits1.totalHits.value);
assertEquals(hits2Docs, hits1Docs);
} catch (AssertionError ae) {
System.out.println("FAILED:");
for(String id : hits1Docs) {
if (hits2Docs.contains(id) == false) {
System.out.println(String.format(Locale.ROOT, " id=%3s matched but should not have", id));
}
}
for(String id : hits2Docs) {
if (hits1Docs.contains(id) == false) {
System.out.println(String.format(Locale.ROOT, " id=%3s did not match but should have", id));
}
}
throw ae;
}
}
IOUtils.close(r, dir, analyzer);
}
private Set<String> toDocIDs(IndexSearcher s, TopDocs hits) throws IOException {
Set<String> result = new HashSet<>();
for(ScoreDoc hit : hits.scoreDocs) {
result.add(s.doc(hit.doc).get("id"));
}
return result;
}
private static class RandomQuery extends Query {
private final long seed;
private float density;
// density should be 0.0 ... 1.0
public RandomQuery(long seed, float density) {
this.seed = seed;
this.density = density;
}
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
return new ConstantScoreWeight(this, boost) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
int maxDoc = context.reader().maxDoc();
FixedBitSet bits = new FixedBitSet(maxDoc);
Random random = new Random(seed ^ context.docBase);
for(int docID=0;docID<maxDoc;docID++) {
if (random.nextFloat() <= density) {
bits.set(docID);
//System.out.println(" acc id=" + idSource.getInt(docID) + " docID=" + docID);
}
}
return new ConstantScoreScorer(this, score(), scoreMode, new BitSetIterator(bits, bits.approximateCardinality()));
}
@Override
public boolean isCacheable(LeafReaderContext ctx) {
return false;
}
};
}
@Override
public void visit(QueryVisitor visitor) {
}
@Override
public String toString(String field) {
return "RandomFilter(seed=" + seed + ",density=" + density + ")";
}
@Override
public boolean equals(Object other) {
return sameClassAs(other) &&
equalsTo(getClass().cast(other));
}
private boolean equalsTo(RandomQuery other) {
return seed == other.seed &&
density == other.density;
}
@Override
public int hashCode() {
return classHash() ^ Objects.hash(seed, density);
}
}
/** See if we can create a TAQ with cycles */
public void testWithCycles1() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newTextField("field", "here comes here comes", Field.Store.NO));
w.addDocument(doc);
doc = new Document();
doc.add(newTextField("field", "comes foo", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
IndexSearcher s = newSearcher(r);
TermAutomatonQuery q = new TermAutomatonQuery("field");
int init = q.createState();
int s1 = q.createState();
int s2 = q.createState();
q.addTransition(init, s1, "here");
q.addTransition(s1, s2, "comes");
q.addTransition(s2, s1, "here");
q.setAccept(s1, true);
q.finish();
assertEquals(1, s.search(q, 1).totalHits.value);
w.close();
r.close();
dir.close();
}
/** See if we can create a TAQ with cycles */
public void testWithCycles2() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newTextField("field", "here comes kaoma", Field.Store.NO));
w.addDocument(doc);
doc = new Document();
doc.add(newTextField("field", "here comes sun sun sun sun kaoma", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
IndexSearcher s = newSearcher(r);
TermAutomatonQuery q = new TermAutomatonQuery("field");
int init = q.createState();
int s1 = q.createState();
q.addTransition(init, s1, "here");
int s2 = q.createState();
q.addTransition(s1, s2, "comes");
int s3 = q.createState();
q.addTransition(s2, s3, "sun");
q.addTransition(s3, s3, "sun");
int s4 = q.createState();
q.addTransition(s3, s4, "kaoma");
q.setAccept(s4, true);
q.finish();
assertEquals(1, s.search(q, 1).totalHits.value);
w.close();
r.close();
dir.close();
}
public void testTermDoesNotExist() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newTextField("field", "x y z", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
IndexSearcher s = newSearcher(r);
TokenStream ts = new CannedTokenStream(new Token[] {
token("a", 1, 1),
});
TermAutomatonQuery q = new TokenStreamToTermAutomatonQuery().toQuery("field", ts);
// System.out.println("DOT: " + q.toDot());
assertEquals(0, s.search(q, 1).totalHits.value);
w.close();
r.close();
dir.close();
}
public void testOneTermDoesNotExist() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newTextField("field", "x y z", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
IndexSearcher s = newSearcher(r);
TokenStream ts = new CannedTokenStream(new Token[] {
token("a", 1, 1),
token("x", 1, 1),
});
TermAutomatonQuery q = new TokenStreamToTermAutomatonQuery().toQuery("field", ts);
// System.out.println("DOT: " + q.toDot());
assertEquals(0, s.search(q, 1).totalHits.value);
IOUtils.close(w, r, dir);
}
public void testEmptyString() throws Exception {
TermAutomatonQuery q = new TermAutomatonQuery("field");
int initState = q.createState();
q.setAccept(initState, true);
expectThrows(IllegalStateException.class, q::finish);
}
public void testRewriteNoMatch() throws Exception {
TermAutomatonQuery q = new TermAutomatonQuery("field");
int initState = q.createState();
q.finish();
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newTextField("field", "x y z", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
assertTrue(q.rewrite(r) instanceof MatchNoDocsQuery);
IOUtils.close(w, r, dir);
}
public void testRewriteTerm() throws Exception {
TermAutomatonQuery q = new TermAutomatonQuery("field");
int initState = q.createState();
int s1 = q.createState();
q.addTransition(initState, s1, "foo");
q.setAccept(s1, true);
q.finish();
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newTextField("field", "x y z", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
Query rewrite = q.rewrite(r);
assertTrue(rewrite instanceof TermQuery);
assertEquals(new Term("field", "foo"), ((TermQuery) rewrite).getTerm());
IOUtils.close(w, r, dir);
}
public void testRewriteSimplePhrase() throws Exception {
TermAutomatonQuery q = new TermAutomatonQuery("field");
int initState = q.createState();
int s1 = q.createState();
int s2 = q.createState();
q.addTransition(initState, s1, "foo");
q.addTransition(s1, s2, "bar");
q.setAccept(s2, true);
q.finish();
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newTextField("field", "x y z", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
Query rewrite = q.rewrite(r);
assertTrue(rewrite instanceof PhraseQuery);
Term[] terms = ((PhraseQuery) rewrite).getTerms();
assertEquals(new Term("field", "foo"), terms[0]);
assertEquals(new Term("field", "bar"), terms[1]);
int[] positions = ((PhraseQuery) rewrite).getPositions();
assertEquals(0, positions[0]);
assertEquals(1, positions[1]);
IOUtils.close(w, r, dir);
}
public void testRewritePhraseWithAny() throws Exception {
TermAutomatonQuery q = new TermAutomatonQuery("field");
int initState = q.createState();
int s1 = q.createState();
int s2 = q.createState();
int s3 = q.createState();
q.addTransition(initState, s1, "foo");
q.addAnyTransition(s1, s2);
q.addTransition(s2, s3, "bar");
q.setAccept(s3, true);
q.finish();
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newTextField("field", "x y z", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
Query rewrite = q.rewrite(r);
assertTrue(rewrite instanceof PhraseQuery);
Term[] terms = ((PhraseQuery) rewrite).getTerms();
assertEquals(new Term("field", "foo"), terms[0]);
assertEquals(new Term("field", "bar"), terms[1]);
int[] positions = ((PhraseQuery) rewrite).getPositions();
assertEquals(0, positions[0]);
assertEquals(2, positions[1]);
IOUtils.close(w, r, dir);
}
public void testRewriteSimpleMultiPhrase() throws Exception {
TermAutomatonQuery q = new TermAutomatonQuery("field");
int initState = q.createState();
int s1 = q.createState();
q.addTransition(initState, s1, "foo");
q.addTransition(initState, s1, "bar");
q.setAccept(s1, true);
q.finish();
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newTextField("field", "x y z", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
Query rewrite = q.rewrite(r);
assertTrue(rewrite instanceof MultiPhraseQuery);
Term[][] terms = ((MultiPhraseQuery) rewrite).getTermArrays();
assertEquals(1, terms.length);
assertEquals(2, terms[0].length);
assertEquals(new Term("field", "foo"), terms[0][0]);
assertEquals(new Term("field", "bar"), terms[0][1]);
int[] positions = ((MultiPhraseQuery) rewrite).getPositions();
assertEquals(1, positions.length);
assertEquals(0, positions[0]);
IOUtils.close(w, r, dir);
}
public void testRewriteMultiPhraseWithAny() throws Exception {
TermAutomatonQuery q = new TermAutomatonQuery("field");
int initState = q.createState();
int s1 = q.createState();
int s2 = q.createState();
int s3 = q.createState();
q.addTransition(initState, s1, "foo");
q.addTransition(initState, s1, "bar");
q.addAnyTransition(s1, s2);
q.addTransition(s2, s3, "baz");
q.setAccept(s3, true);
q.finish();
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newTextField("field", "x y z", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
Query rewrite = q.rewrite(r);
assertTrue(rewrite instanceof MultiPhraseQuery);
Term[][] terms = ((MultiPhraseQuery) rewrite).getTermArrays();
assertEquals(2, terms.length);
assertEquals(2, terms[0].length);
assertEquals(new Term("field", "foo"), terms[0][0]);
assertEquals(new Term("field", "bar"), terms[0][1]);
assertEquals(1, terms[1].length);
assertEquals(new Term("field", "baz"), terms[1][0]);
int[] positions = ((MultiPhraseQuery) rewrite).getPositions();
assertEquals(2, positions.length);
assertEquals(0, positions[0]);
assertEquals(2, positions[1]);
IOUtils.close(w, r, dir);
}
// we query with sun|moon but moon doesn't exist
public void testOneTermMissing() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newTextField("field", "here comes the sun", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
IndexSearcher s = newSearcher(r);
TermAutomatonQuery q = new TermAutomatonQuery("field");
int init = q.createState();
int s1 = q.createState();
q.addTransition(init, s1, "comes");
int s2 = q.createState();
q.addAnyTransition(s1, s2);
int s3 = q.createState();
q.setAccept(s3, true);
q.addTransition(s2, s3, "sun");
q.addTransition(s2, s3, "moon");
q.finish();
assertEquals(1, s.search(q, 1).totalHits.value);
w.close();
r.close();
dir.close();
}
// we query with sun|moon but no terms exist for the field
public void testFieldMissing() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newTextField("field", "here comes the sun", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
IndexSearcher s = newSearcher(r);
TermAutomatonQuery q = new TermAutomatonQuery("bogusfield");
int init = q.createState();
int s1 = q.createState();
q.addTransition(init, s1, "comes");
int s2 = q.createState();
q.addAnyTransition(s1, s2);
int s3 = q.createState();
q.setAccept(s3, true);
q.addTransition(s2, s3, "sun");
q.addTransition(s2, s3, "moon");
q.finish();
assertEquals(0, s.search(q, 1).totalHits.value);
w.close();
r.close();
dir.close();
}
}