blob: 892508b73b5fa320375c3f767e828f8de332e3e0 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
public class TestTermRangeQuery extends LuceneTestCase {
private int docCount = 0;
private Directory dir;
@Override
public void setUp() throws Exception {
super.setUp();
dir = newDirectory();
}
@Override
public void tearDown() throws Exception {
dir.close();
super.tearDown();
}
public void testExclusive() throws Exception {
Query query = TermRangeQuery.newStringRange("content", "A", "C", false, false);
initializeIndex(new String[] {"A", "B", "C", "D"});
IndexReader reader = DirectoryReader.open(dir);
IndexSearcher searcher = newSearcher(reader);
ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
assertEquals("A,B,C,D, only B in range", 1, hits.length);
reader.close();
initializeIndex(new String[] {"A", "B", "D"});
reader = DirectoryReader.open(dir);
searcher = newSearcher(reader);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals("A,B,D, only B in range", 1, hits.length);
reader.close();
addDoc("C");
reader = DirectoryReader.open(dir);
searcher = newSearcher(reader);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals("C added, still only B in range", 1, hits.length);
reader.close();
}
public void testInclusive() throws Exception {
Query query = TermRangeQuery.newStringRange("content", "A", "C", true, true);
initializeIndex(new String[]{"A", "B", "C", "D"});
IndexReader reader = DirectoryReader.open(dir);
IndexSearcher searcher = newSearcher(reader);
ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
assertEquals("A,B,C,D - A,B,C in range", 3, hits.length);
reader.close();
initializeIndex(new String[]{"A", "B", "D"});
reader = DirectoryReader.open(dir);
searcher = newSearcher(reader);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals("A,B,D - A and B in range", 2, hits.length);
reader.close();
addDoc("C");
reader = DirectoryReader.open(dir);
searcher = newSearcher(reader);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals("C added - A, B, C in range", 3, hits.length);
reader.close();
}
public void testAllDocs() throws Exception {
initializeIndex(new String[]{"A", "B", "C", "D"});
IndexReader reader = DirectoryReader.open(dir);
IndexSearcher searcher = newSearcher(reader);
TermRangeQuery query = new TermRangeQuery("content", null, null, true, true);
assertEquals(4, searcher.search(query, 1000).scoreDocs.length);
query = TermRangeQuery.newStringRange("content", "", null, true, true);
assertEquals(4, searcher.search(query, 1000).scoreDocs.length);
query = TermRangeQuery.newStringRange("content", "", null, true, false);
assertEquals(4, searcher.search(query, 1000).scoreDocs.length);
// and now another one
query = TermRangeQuery.newStringRange("content", "B", null, true, true);
assertEquals(3, searcher.search(query, 1000).scoreDocs.length);
reader.close();
}
/** This test should not be here, but it tests the fuzzy query rewrite mode (TOP_TERMS_SCORING_BOOLEAN_REWRITE)
* with constant score and checks, that only the lower end of terms is put into the range */
public void testTopTermsRewrite() throws Exception {
initializeIndex(new String[]{"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K"});
IndexReader reader = DirectoryReader.open(dir);
IndexSearcher searcher = newSearcher(reader);
TermRangeQuery query = TermRangeQuery.newStringRange("content", "B", "J", true, true);
checkBooleanTerms(searcher, query, "B", "C", "D", "E", "F", "G", "H", "I", "J");
final int savedClauseCount = BooleanQuery.getMaxClauseCount();
try {
BooleanQuery.setMaxClauseCount(3);
checkBooleanTerms(searcher, query, "B", "C", "D");
} finally {
BooleanQuery.setMaxClauseCount(savedClauseCount);
}
reader.close();
}
private void checkBooleanTerms(IndexSearcher searcher, TermRangeQuery query, String... terms) throws IOException {
query.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(50));
final BooleanQuery bq = (BooleanQuery) searcher.rewrite(query);
final Set<String> allowedTerms = asSet(terms);
assertEquals(allowedTerms.size(), bq.clauses().size());
for (BooleanClause c : bq.clauses()) {
assertTrue(c.getQuery() instanceof TermQuery);
final TermQuery tq = (TermQuery) c.getQuery();
final String term = tq.getTerm().text();
assertTrue("invalid term: "+ term, allowedTerms.contains(term));
allowedTerms.remove(term); // remove to fail on double terms
}
assertEquals(0, allowedTerms.size());
}
public void testEqualsHashcode() {
Query query = TermRangeQuery.newStringRange("content", "A", "C", true, true);
Query other = TermRangeQuery.newStringRange("content", "A", "C", true, true);
assertEquals("query equals itself is true", query, query);
assertEquals("equivalent queries are equal", query, other);
assertEquals("hashcode must return same value when equals is true", query.hashCode(), other.hashCode());
other = TermRangeQuery.newStringRange("notcontent", "A", "C", true, true);
assertFalse("Different fields are not equal", query.equals(other));
other = TermRangeQuery.newStringRange("content", "X", "C", true, true);
assertFalse("Different lower terms are not equal", query.equals(other));
other = TermRangeQuery.newStringRange("content", "A", "Z", true, true);
assertFalse("Different upper terms are not equal", query.equals(other));
query = TermRangeQuery.newStringRange("content", null, "C", true, true);
other = TermRangeQuery.newStringRange("content", null, "C", true, true);
assertEquals("equivalent queries with null lowerterms are equal()", query, other);
assertEquals("hashcode must return same value when equals is true", query.hashCode(), other.hashCode());
query = TermRangeQuery.newStringRange("content", "C", null, true, true);
other = TermRangeQuery.newStringRange("content", "C", null, true, true);
assertEquals("equivalent queries with null upperterms are equal()", query, other);
assertEquals("hashcode returns same value", query.hashCode(), other.hashCode());
query = TermRangeQuery.newStringRange("content", null, "C", true, true);
other = TermRangeQuery.newStringRange("content", "C", null, true, true);
assertFalse("queries with different upper and lower terms are not equal", query.equals(other));
query = TermRangeQuery.newStringRange("content", "A", "C", false, false);
other = TermRangeQuery.newStringRange("content", "A", "C", true, true);
assertFalse("queries with different inclusive are not equal", query.equals(other));
}
private static class SingleCharAnalyzer extends Analyzer {
private static class SingleCharTokenizer extends Tokenizer {
char[] buffer = new char[1];
boolean done = false;
CharTermAttribute termAtt;
public SingleCharTokenizer() {
super();
termAtt = addAttribute(CharTermAttribute.class);
}
@Override
public boolean incrementToken() throws IOException {
if (done)
return false;
else {
int count = input.read(buffer);
clearAttributes();
done = true;
if (count == 1) {
termAtt.copyBuffer(buffer, 0, 1);
}
return true;
}
}
@Override
public void reset() throws IOException {
super.reset();
done = false;
}
}
@Override
public TokenStreamComponents createComponents(String fieldName) {
return new TokenStreamComponents(new SingleCharTokenizer());
}
}
private void initializeIndex(String[] values) throws IOException {
initializeIndex(values, new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false));
}
private void initializeIndex(String[] values, Analyzer analyzer) throws IOException {
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(analyzer).setOpenMode(OpenMode.CREATE));
for (int i = 0; i < values.length; i++) {
insertDoc(writer, values[i]);
}
writer.close();
}
// shouldnt create an analyzer for every doc?
private void addDoc(String content) throws IOException {
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)).setOpenMode(OpenMode.APPEND));
insertDoc(writer, content);
writer.close();
}
private void insertDoc(IndexWriter writer, String content) throws IOException {
Document doc = new Document();
doc.add(newStringField("id", "id" + docCount, Field.Store.YES));
doc.add(newTextField("content", content, Field.Store.NO));
writer.addDocument(doc);
docCount++;
}
// LUCENE-38
public void testExclusiveLowerNull() throws Exception {
Analyzer analyzer = new SingleCharAnalyzer();
//http://issues.apache.org/jira/browse/LUCENE-38
Query query = TermRangeQuery.newStringRange("content", null, "C",
false, false);
initializeIndex(new String[] {"A", "B", "", "C", "D"}, analyzer);
IndexReader reader = DirectoryReader.open(dir);
IndexSearcher searcher = newSearcher(reader);
long numHits = searcher.search(query, 1000).totalHits.value;
// When Lucene-38 is fixed, use the assert on the next line:
assertEquals("A,B,<empty string>,C,D => A, B & <empty string> are in range", 3, numHits);
// until Lucene-38 is fixed, use this assert:
//assertEquals("A,B,<empty string>,C,D => A, B & <empty string> are in range", 2, hits.length());
reader.close();
initializeIndex(new String[] {"A", "B", "", "D"}, analyzer);
reader = DirectoryReader.open(dir);
searcher = newSearcher(reader);
numHits = searcher.search(query, 1000).totalHits.value;
// When Lucene-38 is fixed, use the assert on the next line:
assertEquals("A,B,<empty string>,D => A, B & <empty string> are in range", 3, numHits);
// until Lucene-38 is fixed, use this assert:
//assertEquals("A,B,<empty string>,D => A, B & <empty string> are in range", 2, hits.length());
reader.close();
addDoc("C");
reader = DirectoryReader.open(dir);
searcher = newSearcher(reader);
numHits = searcher.search(query, 1000).totalHits.value;
// When Lucene-38 is fixed, use the assert on the next line:
assertEquals("C added, still A, B & <empty string> are in range", 3, numHits);
// until Lucene-38 is fixed, use this assert
//assertEquals("C added, still A, B & <empty string> are in range", 2, hits.length());
reader.close();
}
// LUCENE-38
public void testInclusiveLowerNull() throws Exception {
//http://issues.apache.org/jira/browse/LUCENE-38
Analyzer analyzer = new SingleCharAnalyzer();
Query query = TermRangeQuery.newStringRange("content", null, "C", true, true);
initializeIndex(new String[]{"A", "B", "","C", "D"}, analyzer);
IndexReader reader = DirectoryReader.open(dir);
IndexSearcher searcher = newSearcher(reader);
long numHits = searcher.search(query, 1000).totalHits.value;
// When Lucene-38 is fixed, use the assert on the next line:
assertEquals("A,B,<empty string>,C,D => A,B,<empty string>,C in range", 4, numHits);
// until Lucene-38 is fixed, use this assert
//assertEquals("A,B,<empty string>,C,D => A,B,<empty string>,C in range", 3, hits.length());
reader.close();
initializeIndex(new String[]{"A", "B", "", "D"}, analyzer);
reader = DirectoryReader.open(dir);
searcher = newSearcher(reader);
numHits = searcher.search(query, 1000).totalHits.value;
// When Lucene-38 is fixed, use the assert on the next line:
assertEquals("A,B,<empty string>,D - A, B and <empty string> in range", 3, numHits);
// until Lucene-38 is fixed, use this assert
//assertEquals("A,B,<empty string>,D => A, B and <empty string> in range", 2, hits.length());
reader.close();
addDoc("C");
reader = DirectoryReader.open(dir);
searcher = newSearcher(reader);
numHits = searcher.search(query, 1000).totalHits.value;
// When Lucene-38 is fixed, use the assert on the next line:
assertEquals("C added => A,B,<empty string>,C in range", 4, numHits);
// until Lucene-38 is fixed, use this assert
//assertEquals("C added => A,B,<empty string>,C in range", 3, hits.length());
reader.close();
}
}