blob: d9c6032aa91e183efd00fbd5ae5479e065ea3260 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiTerms;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SingleTermsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.Rethrow;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.AutomatonTestUtil;
import org.apache.lucene.util.automaton.Operations;
import static org.apache.lucene.util.automaton.Operations.DEFAULT_DETERMINIZE_WORK_LIMIT;
public class TestAutomatonQuery extends LuceneTestCase {
private Directory directory;
private IndexReader reader;
private IndexSearcher searcher;
private static final String FN = "field";
@Override
public void setUp() throws Exception {
super.setUp();
directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
Document doc = new Document();
Field titleField = newTextField("title", "some title", Field.Store.NO);
Field field = newTextField(FN, "this is document one 2345", Field.Store.NO);
Field footerField = newTextField("footer", "a footer", Field.Store.NO);
doc.add(titleField);
doc.add(field);
doc.add(footerField);
writer.addDocument(doc);
field.setStringValue("some text from doc two a short piece 5678.91");
writer.addDocument(doc);
field.setStringValue("doc three has some different stuff"
+ " with numbers 1234 5678.9 and letter b");
writer.addDocument(doc);
reader = writer.getReader();
searcher = newSearcher(reader);
writer.close();
}
@Override
public void tearDown() throws Exception {
reader.close();
directory.close();
super.tearDown();
}
private Term newTerm(String value) {
return new Term(FN, value);
}
private long automatonQueryNrHits(AutomatonQuery query) throws IOException {
if (VERBOSE) {
System.out.println("TEST: run aq=" + query);
}
return searcher.search(query, 5).totalHits.value;
}
private void assertAutomatonHits(int expected, Automaton automaton)
throws IOException {
AutomatonQuery query = new AutomatonQuery(newTerm("bogus"), automaton);
query.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_REWRITE);
assertEquals(expected, automatonQueryNrHits(query));
query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_REWRITE);
assertEquals(expected, automatonQueryNrHits(query));
query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE);
assertEquals(expected, automatonQueryNrHits(query));
}
/**
* Test some very simple automata.
*/
public void testAutomata() throws IOException {
assertAutomatonHits(0, Automata.makeEmpty());
assertAutomatonHits(0, Automata.makeEmptyString());
assertAutomatonHits(2, Automata.makeAnyChar());
assertAutomatonHits(3, Automata.makeAnyString());
assertAutomatonHits(2, Automata.makeString("doc"));
assertAutomatonHits(1, Automata.makeChar('a'));
assertAutomatonHits(2, Automata.makeCharRange('a', 'b'));
assertAutomatonHits(2, Automata.makeDecimalInterval(1233, 2346, 0));
assertAutomatonHits(1, Automata.makeDecimalInterval(0, 2000, 0));
assertAutomatonHits(2, Operations.union(Automata.makeChar('a'),
Automata.makeChar('b')));
assertAutomatonHits(0, Operations.intersection(Automata
.makeChar('a'), Automata.makeChar('b')));
assertAutomatonHits(1, Operations.minus(Automata.makeCharRange('a', 'b'),
Automata.makeChar('a'), DEFAULT_DETERMINIZE_WORK_LIMIT));
}
/**
* Test that a nondeterministic automaton works correctly. (It should will be
* determinized)
*/
public void testNFA() throws IOException {
// accept this or three, the union is an NFA (two transitions for 't' from
// initial state)
Automaton nfa = Operations.union(Automata.makeString("this"),
Automata.makeString("three"));
assertAutomatonHits(2, nfa);
}
public void testEquals() {
AutomatonQuery a1 = new AutomatonQuery(newTerm("foobar"), Automata
.makeString("foobar"));
// reference to a1
AutomatonQuery a2 = a1;
// same as a1 (accepts the same language, same term)
AutomatonQuery a3 = new AutomatonQuery(newTerm("foobar"),
Operations.concatenate(
Automata.makeString("foo"),
Automata.makeString("bar")));
// different than a1 (same term, but different language)
AutomatonQuery a4 = new AutomatonQuery(newTerm("foobar"),
Automata.makeString("different"));
// different than a1 (different term, same language)
AutomatonQuery a5 = new AutomatonQuery(newTerm("blah"),
Automata.makeString("foobar"));
assertEquals(a1.hashCode(), a2.hashCode());
assertEquals(a1, a2);
assertEquals(a1.hashCode(), a3.hashCode());
assertEquals(a1, a3);
// different class
AutomatonQuery w1 = new WildcardQuery(newTerm("foobar"));
// different class
AutomatonQuery w2 = new RegexpQuery(newTerm("foobar"));
assertFalse(a1.equals(w1));
assertFalse(a1.equals(w2));
assertFalse(w1.equals(w2));
assertFalse(a1.equals(a4));
assertFalse(a1.equals(a5));
assertFalse(a1.equals(null));
}
/**
* Test that rewriting to a single term works as expected, preserves
* MultiTermQuery semantics.
*/
public void testRewriteSingleTerm() throws IOException {
AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), Automata.makeString("piece"));
Terms terms = MultiTerms.getTerms(searcher.getIndexReader(), FN);
assertTrue(aq.getTermsEnum(terms) instanceof SingleTermsEnum);
assertEquals(1, automatonQueryNrHits(aq));
}
/**
* Test that rewriting to a prefix query works as expected, preserves
* MultiTermQuery semantics.
*/
public void testRewritePrefix() throws IOException {
Automaton pfx = Automata.makeString("do");
Automaton prefixAutomaton = Operations.concatenate(pfx, Automata.makeAnyString());
AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), prefixAutomaton);
assertEquals(3, automatonQueryNrHits(aq));
}
/**
* Test handling of the empty language
*/
public void testEmptyOptimization() throws IOException {
AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), Automata.makeEmpty());
// not yet available: assertTrue(aq.getEnum(searcher.getIndexReader())
// instanceof EmptyTermEnum);
Terms terms = MultiTerms.getTerms(searcher.getIndexReader(), FN);
assertSame(TermsEnum.EMPTY, aq.getTermsEnum(terms));
assertEquals(0, automatonQueryNrHits(aq));
}
public void testHashCodeWithThreads() throws Exception {
final AutomatonQuery queries[] = new AutomatonQuery[atLeast(100)];
for (int i = 0; i < queries.length; i++) {
queries[i] = new AutomatonQuery(new Term("bogus", "bogus"), AutomatonTestUtil.randomAutomaton(random()), Integer.MAX_VALUE);
}
final CountDownLatch startingGun = new CountDownLatch(1);
int numThreads = TestUtil.nextInt(random(), 2, 5);
Thread[] threads = new Thread[numThreads];
for (int threadID = 0; threadID < numThreads; threadID++) {
Thread thread = new Thread() {
@Override
public void run() {
try {
startingGun.await();
for (int i = 0; i < queries.length; i++) {
queries[i].hashCode();
}
} catch (Exception e) {
Rethrow.rethrow(e);
}
}
};
threads[threadID] = thread;
thread.start();
}
startingGun.countDown();
for (Thread thread : threads) {
thread.join();
}
}
public void testBiggishAutomaton() {
List<BytesRef> terms = new ArrayList<>();
while (terms.size() < 3000) {
terms.add(new BytesRef(TestUtil.randomUnicodeString(random())));
}
Collections.sort(terms);
new AutomatonQuery(new Term("foo", "bar"), Automata.makeStringUnion(terms), Integer.MAX_VALUE);
}
}