| package org.apache.lucene.search; |
| |
| /** |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| import org.apache.lucene.analysis.MockAnalyzer; |
| import org.apache.lucene.analysis.MockTokenizer; |
| import org.apache.lucene.analysis.SimpleAnalyzer; |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.document.Field; |
| import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.index.RandomIndexWriter; |
| import org.apache.lucene.index.Term; |
| import org.apache.lucene.store.Directory; |
| import org.junit.AfterClass; |
| import org.junit.BeforeClass; |
| import org.junit.Test; |
| |
| import java.io.IOException; |
| import java.text.Collator; |
| import java.util.Locale; |
| |
| import junit.framework.Assert; |
| |
| public class TestMultiTermConstantScore extends BaseTestRangeFilter { |
| |
| /** threshold for comparing floats */ |
| public static final float SCORE_COMP_THRESH = 1e-6f; |
| |
| static Directory small; |
| static IndexReader reader; |
| |
| static public void assertEquals(String m, float e, float a) { |
| Assert.assertEquals(m, e, a, SCORE_COMP_THRESH); |
| } |
| |
| static public void assertEquals(String m, int e, int a) { |
| Assert.assertEquals(m, e, a); |
| } |
| |
| @BeforeClass |
| public static void beforeClass() throws Exception { |
| String[] data = new String[] { "A 1 2 3 4 5 6", "Z 4 5 6", null, |
| "B 2 4 5 6", "Y 3 5 6", null, "C 3 6", |
| "X 4 5 6" }; |
| |
| small = newDirectory(); |
| RandomIndexWriter writer = new RandomIndexWriter(random, small, |
| newIndexWriterConfig(TEST_VERSION_CURRENT, |
| new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setMergePolicy(newLogMergePolicy())); |
| |
| for (int i = 0; i < data.length; i++) { |
| Document doc = new Document(); |
| doc.add(newField("id", String.valueOf(i), Field.Store.YES, |
| Field.Index.NOT_ANALYZED));// Field.Keyword("id",String.valueOf(i))); |
| doc |
| .add(newField("all", "all", Field.Store.YES, |
| Field.Index.NOT_ANALYZED));// Field.Keyword("all","all")); |
| if (null != data[i]) { |
| doc.add(newField("data", data[i], Field.Store.YES, |
| Field.Index.ANALYZED));// Field.Text("data",data[i])); |
| } |
| writer.addDocument(doc); |
| } |
| |
| reader = writer.getReader(); |
| writer.close(); |
| } |
| |
| @AfterClass |
| public static void afterClass() throws Exception { |
| reader.close(); |
| small.close(); |
| reader = null; |
| small = null; |
| } |
| |
| /** macro for readability */ |
| public static Query csrq(String f, String l, String h, boolean il, boolean ih) { |
| TermRangeQuery query = new TermRangeQuery(f, l, h, il, ih); |
| query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); |
| return query; |
| } |
| |
| public static Query csrq(String f, String l, String h, boolean il, boolean ih, MultiTermQuery.RewriteMethod method) { |
| TermRangeQuery query = new TermRangeQuery(f, l, h, il, ih); |
| query.setRewriteMethod(method); |
| return query; |
| } |
| |
| /** macro for readability */ |
| public static Query csrq(String f, String l, String h, boolean il, |
| boolean ih, Collator c) { |
| TermRangeQuery query = new TermRangeQuery(f, l, h, il, ih, c); |
| query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); |
| return query; |
| } |
| |
| /** macro for readability */ |
| public static Query cspq(Term prefix) { |
| PrefixQuery query = new PrefixQuery(prefix); |
| query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); |
| return query; |
| } |
| |
| /** macro for readability */ |
| public static Query cswcq(Term wild) { |
| WildcardQuery query = new WildcardQuery(wild); |
| query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); |
| return query; |
| } |
| |
| @Test |
| public void testBasics() throws IOException { |
| QueryUtils.check(csrq("data", "1", "6", T, T)); |
| QueryUtils.check(csrq("data", "A", "Z", T, T)); |
| QueryUtils.checkUnequal(csrq("data", "1", "6", T, T), csrq("data", "A", |
| "Z", T, T)); |
| |
| QueryUtils.check(cspq(new Term("data", "p*u?"))); |
| QueryUtils.checkUnequal(cspq(new Term("data", "pre*")), cspq(new Term( |
| "data", "pres*"))); |
| |
| QueryUtils.check(cswcq(new Term("data", "p"))); |
| QueryUtils.checkUnequal(cswcq(new Term("data", "pre*n?t")), cswcq(new Term( |
| "data", "pr*t?j"))); |
| } |
| |
| @Test |
| public void testBasicsRngCollating() throws IOException { |
| Collator c = Collator.getInstance(Locale.ENGLISH); |
| QueryUtils.check(csrq("data", "1", "6", T, T, c)); |
| QueryUtils.check(csrq("data", "A", "Z", T, T, c)); |
| QueryUtils.checkUnequal(csrq("data", "1", "6", T, T, c), csrq("data", "A", |
| "Z", T, T, c)); |
| } |
| |
| @Test |
| public void testEqualScores() throws IOException { |
| // NOTE: uses index build in *this* setUp |
| |
| IndexSearcher search = newSearcher(reader); |
| |
| ScoreDoc[] result; |
| |
| // some hits match more terms then others, score should be the same |
| |
| result = search.search(csrq("data", "1", "6", T, T), null, 1000).scoreDocs; |
| int numHits = result.length; |
| assertEquals("wrong number of results", 6, numHits); |
| float score = result[0].score; |
| for (int i = 1; i < numHits; i++) { |
| assertEquals("score for " + i + " was not the same", score, |
| result[i].score); |
| } |
| |
| result = search.search(csrq("data", "1", "6", T, T, MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE), null, 1000).scoreDocs; |
| numHits = result.length; |
| assertEquals("wrong number of results", 6, numHits); |
| for (int i = 0; i < numHits; i++) { |
| assertEquals("score for " + i + " was not the same", score, |
| result[i].score); |
| } |
| |
| search.close(); |
| } |
| |
| @Test |
| public void testBoost() throws IOException { |
| // NOTE: uses index build in *this* setUp |
| |
| IndexSearcher search = newSearcher(reader); |
| |
| // test for correct application of query normalization |
| // must use a non score normalizing method for this. |
| Query q = csrq("data", "1", "6", T, T); |
| q.setBoost(100); |
| search.search(q, null, new Collector() { |
| private int base = 0; |
| private Scorer scorer; |
| @Override |
| public void setScorer(Scorer scorer) throws IOException { |
| this.scorer = scorer; |
| } |
| @Override |
| public void collect(int doc) throws IOException { |
| assertEquals("score for doc " + (doc + base) + " was not correct", 1.0f, scorer.score()); |
| } |
| @Override |
| public void setNextReader(IndexReader reader, int docBase) { |
| base = docBase; |
| } |
| @Override |
| public boolean acceptsDocsOutOfOrder() { |
| return true; |
| } |
| }); |
| |
| // |
| // Ensure that boosting works to score one clause of a query higher |
| // than another. |
| // |
| Query q1 = csrq("data", "A", "A", T, T); // matches document #0 |
| q1.setBoost(.1f); |
| Query q2 = csrq("data", "Z", "Z", T, T); // matches document #1 |
| BooleanQuery bq = new BooleanQuery(true); |
| bq.add(q1, BooleanClause.Occur.SHOULD); |
| bq.add(q2, BooleanClause.Occur.SHOULD); |
| |
| ScoreDoc[] hits = search.search(bq, null, 1000).scoreDocs; |
| Assert.assertEquals(1, hits[0].doc); |
| Assert.assertEquals(0, hits[1].doc); |
| assertTrue(hits[0].score > hits[1].score); |
| |
| q1 = csrq("data", "A", "A", T, T, MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE); // matches document #0 |
| q1.setBoost(.1f); |
| q2 = csrq("data", "Z", "Z", T, T, MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE); // matches document #1 |
| bq = new BooleanQuery(true); |
| bq.add(q1, BooleanClause.Occur.SHOULD); |
| bq.add(q2, BooleanClause.Occur.SHOULD); |
| |
| hits = search.search(bq, null, 1000).scoreDocs; |
| Assert.assertEquals(1, hits[0].doc); |
| Assert.assertEquals(0, hits[1].doc); |
| assertTrue(hits[0].score > hits[1].score); |
| |
| q1 = csrq("data", "A", "A", T, T); // matches document #0 |
| q1.setBoost(10f); |
| q2 = csrq("data", "Z", "Z", T, T); // matches document #1 |
| bq = new BooleanQuery(true); |
| bq.add(q1, BooleanClause.Occur.SHOULD); |
| bq.add(q2, BooleanClause.Occur.SHOULD); |
| |
| hits = search.search(bq, null, 1000).scoreDocs; |
| Assert.assertEquals(0, hits[0].doc); |
| Assert.assertEquals(1, hits[1].doc); |
| assertTrue(hits[0].score > hits[1].score); |
| search.close(); |
| } |
| |
| @Test |
| public void testBooleanOrderUnAffected() throws IOException { |
| // NOTE: uses index build in *this* setUp |
| |
| IndexSearcher search = newSearcher(reader); |
| |
| // first do a regular TermRangeQuery which uses term expansion so |
| // docs with more terms in range get higher scores |
| |
| Query rq = new TermRangeQuery("data", "1", "4", T, T); |
| |
| ScoreDoc[] expected = search.search(rq, null, 1000).scoreDocs; |
| int numHits = expected.length; |
| |
| // now do a boolean where which also contains a |
| // ConstantScoreRangeQuery and make sure hte order is the same |
| |
| BooleanQuery q = new BooleanQuery(); |
| q.add(rq, BooleanClause.Occur.MUST);// T, F); |
| q.add(csrq("data", "1", "6", T, T), BooleanClause.Occur.MUST);// T, F); |
| |
| ScoreDoc[] actual = search.search(q, null, 1000).scoreDocs; |
| |
| assertEquals("wrong numebr of hits", numHits, actual.length); |
| for (int i = 0; i < numHits; i++) { |
| assertEquals("mismatch in docid for hit#" + i, expected[i].doc, |
| actual[i].doc); |
| } |
| |
| search.close(); |
| } |
| |
| @Test |
| public void testRangeQueryId() throws IOException { |
| // NOTE: uses index build in *super* setUp |
| |
| IndexReader reader = signedIndexReader; |
| IndexSearcher search = newSearcher(reader); |
| |
| int medId = ((maxId - minId) / 2); |
| |
| String minIP = pad(minId); |
| String maxIP = pad(maxId); |
| String medIP = pad(medId); |
| |
| int numDocs = reader.numDocs(); |
| |
| assertEquals("num of docs", numDocs, 1 + maxId - minId); |
| |
| ScoreDoc[] result; |
| |
| // test id, bounded on both ends |
| |
| result = search.search(csrq("id", minIP, maxIP, T, T), null, numDocs).scoreDocs; |
| assertEquals("find all", numDocs, result.length); |
| |
| result = search.search(csrq("id", minIP, maxIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; |
| assertEquals("find all", numDocs, result.length); |
| |
| result = search.search(csrq("id", minIP, maxIP, T, F), null, numDocs).scoreDocs; |
| assertEquals("all but last", numDocs - 1, result.length); |
| |
| result = search.search(csrq("id", minIP, maxIP, T, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; |
| assertEquals("all but last", numDocs - 1, result.length); |
| |
| result = search.search(csrq("id", minIP, maxIP, F, T), null, numDocs).scoreDocs; |
| assertEquals("all but first", numDocs - 1, result.length); |
| |
| result = search.search(csrq("id", minIP, maxIP, F, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; |
| assertEquals("all but first", numDocs - 1, result.length); |
| |
| result = search.search(csrq("id", minIP, maxIP, F, F), null, numDocs).scoreDocs; |
| assertEquals("all but ends", numDocs - 2, result.length); |
| |
| result = search.search(csrq("id", minIP, maxIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; |
| assertEquals("all but ends", numDocs - 2, result.length); |
| |
| result = search.search(csrq("id", medIP, maxIP, T, T), null, numDocs).scoreDocs; |
| assertEquals("med and up", 1 + maxId - medId, result.length); |
| |
| result = search.search(csrq("id", medIP, maxIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; |
| assertEquals("med and up", 1 + maxId - medId, result.length); |
| |
| result = search.search(csrq("id", minIP, medIP, T, T), null, numDocs).scoreDocs; |
| assertEquals("up to med", 1 + medId - minId, result.length); |
| |
| result = search.search(csrq("id", minIP, medIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; |
| assertEquals("up to med", 1 + medId - minId, result.length); |
| |
| // unbounded id |
| |
| result = search.search(csrq("id", minIP, null, T, F), null, numDocs).scoreDocs; |
| assertEquals("min and up", numDocs, result.length); |
| |
| result = search.search(csrq("id", null, maxIP, F, T), null, numDocs).scoreDocs; |
| assertEquals("max and down", numDocs, result.length); |
| |
| result = search.search(csrq("id", minIP, null, F, F), null, numDocs).scoreDocs; |
| assertEquals("not min, but up", numDocs - 1, result.length); |
| |
| result = search.search(csrq("id", null, maxIP, F, F), null, numDocs).scoreDocs; |
| assertEquals("not max, but down", numDocs - 1, result.length); |
| |
| result = search.search(csrq("id", medIP, maxIP, T, F), null, numDocs).scoreDocs; |
| assertEquals("med and up, not max", maxId - medId, result.length); |
| |
| result = search.search(csrq("id", minIP, medIP, F, T), null, numDocs).scoreDocs; |
| assertEquals("not min, up to med", medId - minId, result.length); |
| |
| // very small sets |
| |
| result = search.search(csrq("id", minIP, minIP, F, F), null, numDocs).scoreDocs; |
| assertEquals("min,min,F,F", 0, result.length); |
| |
| result = search.search(csrq("id", minIP, minIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; |
| assertEquals("min,min,F,F", 0, result.length); |
| |
| result = search.search(csrq("id", medIP, medIP, F, F), null, numDocs).scoreDocs; |
| assertEquals("med,med,F,F", 0, result.length); |
| |
| result = search.search(csrq("id", medIP, medIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; |
| assertEquals("med,med,F,F", 0, result.length); |
| |
| result = search.search(csrq("id", maxIP, maxIP, F, F), null, numDocs).scoreDocs; |
| assertEquals("max,max,F,F", 0, result.length); |
| |
| result = search.search(csrq("id", maxIP, maxIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; |
| assertEquals("max,max,F,F", 0, result.length); |
| |
| result = search.search(csrq("id", minIP, minIP, T, T), null, numDocs).scoreDocs; |
| assertEquals("min,min,T,T", 1, result.length); |
| |
| result = search.search(csrq("id", minIP, minIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; |
| assertEquals("min,min,T,T", 1, result.length); |
| |
| result = search.search(csrq("id", null, minIP, F, T), null, numDocs).scoreDocs; |
| assertEquals("nul,min,F,T", 1, result.length); |
| |
| result = search.search(csrq("id", null, minIP, F, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; |
| assertEquals("nul,min,F,T", 1, result.length); |
| |
| result = search.search(csrq("id", maxIP, maxIP, T, T), null, numDocs).scoreDocs; |
| assertEquals("max,max,T,T", 1, result.length); |
| |
| result = search.search(csrq("id", maxIP, maxIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; |
| assertEquals("max,max,T,T", 1, result.length); |
| |
| result = search.search(csrq("id", maxIP, null, T, F), null, numDocs).scoreDocs; |
| assertEquals("max,nul,T,T", 1, result.length); |
| |
| result = search.search(csrq("id", maxIP, null, T, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; |
| assertEquals("max,nul,T,T", 1, result.length); |
| |
| result = search.search(csrq("id", medIP, medIP, T, T), null, numDocs).scoreDocs; |
| assertEquals("med,med,T,T", 1, result.length); |
| |
| result = search.search(csrq("id", medIP, medIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; |
| assertEquals("med,med,T,T", 1, result.length); |
| |
| search.close(); |
| } |
| |
| @Test |
| public void testRangeQueryIdCollating() throws IOException { |
| // NOTE: uses index build in *super* setUp |
| |
| IndexReader reader = signedIndexReader; |
| IndexSearcher search = newSearcher(reader); |
| |
| int medId = ((maxId - minId) / 2); |
| |
| String minIP = pad(minId); |
| String maxIP = pad(maxId); |
| String medIP = pad(medId); |
| |
| int numDocs = reader.numDocs(); |
| |
| assertEquals("num of docs", numDocs, 1 + maxId - minId); |
| |
| ScoreDoc[] result; |
| |
| Collator c = Collator.getInstance(Locale.ENGLISH); |
| |
| // test id, bounded on both ends |
| |
| result = search.search(csrq("id", minIP, maxIP, T, T, c), null, numDocs).scoreDocs; |
| assertEquals("find all", numDocs, result.length); |
| |
| result = search.search(csrq("id", minIP, maxIP, T, F, c), null, numDocs).scoreDocs; |
| assertEquals("all but last", numDocs - 1, result.length); |
| |
| result = search.search(csrq("id", minIP, maxIP, F, T, c), null, numDocs).scoreDocs; |
| assertEquals("all but first", numDocs - 1, result.length); |
| |
| result = search.search(csrq("id", minIP, maxIP, F, F, c), null, numDocs).scoreDocs; |
| assertEquals("all but ends", numDocs - 2, result.length); |
| |
| result = search.search(csrq("id", medIP, maxIP, T, T, c), null, numDocs).scoreDocs; |
| assertEquals("med and up", 1 + maxId - medId, result.length); |
| |
| result = search.search(csrq("id", minIP, medIP, T, T, c), null, numDocs).scoreDocs; |
| assertEquals("up to med", 1 + medId - minId, result.length); |
| |
| // unbounded id |
| |
| result = search.search(csrq("id", minIP, null, T, F, c), null, numDocs).scoreDocs; |
| assertEquals("min and up", numDocs, result.length); |
| |
| result = search.search(csrq("id", null, maxIP, F, T, c), null, numDocs).scoreDocs; |
| assertEquals("max and down", numDocs, result.length); |
| |
| result = search.search(csrq("id", minIP, null, F, F, c), null, numDocs).scoreDocs; |
| assertEquals("not min, but up", numDocs - 1, result.length); |
| |
| result = search.search(csrq("id", null, maxIP, F, F, c), null, numDocs).scoreDocs; |
| assertEquals("not max, but down", numDocs - 1, result.length); |
| |
| result = search.search(csrq("id", medIP, maxIP, T, F, c), null, numDocs).scoreDocs; |
| assertEquals("med and up, not max", maxId - medId, result.length); |
| |
| result = search.search(csrq("id", minIP, medIP, F, T, c), null, numDocs).scoreDocs; |
| assertEquals("not min, up to med", medId - minId, result.length); |
| |
| // very small sets |
| |
| result = search.search(csrq("id", minIP, minIP, F, F, c), null, numDocs).scoreDocs; |
| assertEquals("min,min,F,F,c", 0, result.length); |
| result = search.search(csrq("id", medIP, medIP, F, F, c), null, numDocs).scoreDocs; |
| assertEquals("med,med,F,F,c", 0, result.length); |
| result = search.search(csrq("id", maxIP, maxIP, F, F, c), null, numDocs).scoreDocs; |
| assertEquals("max,max,F,F,c", 0, result.length); |
| |
| result = search.search(csrq("id", minIP, minIP, T, T, c), null, numDocs).scoreDocs; |
| assertEquals("min,min,T,T,c", 1, result.length); |
| result = search.search(csrq("id", null, minIP, F, T, c), null, numDocs).scoreDocs; |
| assertEquals("nul,min,F,T,c", 1, result.length); |
| |
| result = search.search(csrq("id", maxIP, maxIP, T, T, c), null, numDocs).scoreDocs; |
| assertEquals("max,max,T,T,c", 1, result.length); |
| result = search.search(csrq("id", maxIP, null, T, F, c), null, numDocs).scoreDocs; |
| assertEquals("max,nul,T,T,c", 1, result.length); |
| |
| result = search.search(csrq("id", medIP, medIP, T, T, c), null, numDocs).scoreDocs; |
| assertEquals("med,med,T,T,c", 1, result.length); |
| |
| search.close(); |
| } |
| |
| @Test |
| public void testRangeQueryRand() throws IOException { |
| // NOTE: uses index build in *super* setUp |
| |
| IndexReader reader = signedIndexReader; |
| IndexSearcher search = newSearcher(reader); |
| |
| String minRP = pad(signedIndexDir.minR); |
| String maxRP = pad(signedIndexDir.maxR); |
| |
| int numDocs = reader.numDocs(); |
| |
| assertEquals("num of docs", numDocs, 1 + maxId - minId); |
| |
| ScoreDoc[] result; |
| |
| // test extremes, bounded on both ends |
| |
| result = search.search(csrq("rand", minRP, maxRP, T, T), null, numDocs).scoreDocs; |
| assertEquals("find all", numDocs, result.length); |
| |
| result = search.search(csrq("rand", minRP, maxRP, T, F), null, numDocs).scoreDocs; |
| assertEquals("all but biggest", numDocs - 1, result.length); |
| |
| result = search.search(csrq("rand", minRP, maxRP, F, T), null, numDocs).scoreDocs; |
| assertEquals("all but smallest", numDocs - 1, result.length); |
| |
| result = search.search(csrq("rand", minRP, maxRP, F, F), null, numDocs).scoreDocs; |
| assertEquals("all but extremes", numDocs - 2, result.length); |
| |
| // unbounded |
| |
| result = search.search(csrq("rand", minRP, null, T, F), null, numDocs).scoreDocs; |
| assertEquals("smallest and up", numDocs, result.length); |
| |
| result = search.search(csrq("rand", null, maxRP, F, T), null, numDocs).scoreDocs; |
| assertEquals("biggest and down", numDocs, result.length); |
| |
| result = search.search(csrq("rand", minRP, null, F, F), null, numDocs).scoreDocs; |
| assertEquals("not smallest, but up", numDocs - 1, result.length); |
| |
| result = search.search(csrq("rand", null, maxRP, F, F), null, numDocs).scoreDocs; |
| assertEquals("not biggest, but down", numDocs - 1, result.length); |
| |
| // very small sets |
| |
| result = search.search(csrq("rand", minRP, minRP, F, F), null, numDocs).scoreDocs; |
| assertEquals("min,min,F,F", 0, result.length); |
| result = search.search(csrq("rand", maxRP, maxRP, F, F), null, numDocs).scoreDocs; |
| assertEquals("max,max,F,F", 0, result.length); |
| |
| result = search.search(csrq("rand", minRP, minRP, T, T), null, numDocs).scoreDocs; |
| assertEquals("min,min,T,T", 1, result.length); |
| result = search.search(csrq("rand", null, minRP, F, T), null, numDocs).scoreDocs; |
| assertEquals("nul,min,F,T", 1, result.length); |
| |
| result = search.search(csrq("rand", maxRP, maxRP, T, T), null, numDocs).scoreDocs; |
| assertEquals("max,max,T,T", 1, result.length); |
| result = search.search(csrq("rand", maxRP, null, T, F), null, numDocs).scoreDocs; |
| assertEquals("max,nul,T,T", 1, result.length); |
| |
| search.close(); |
| } |
| |
| @Test |
| public void testRangeQueryRandCollating() throws IOException { |
| // NOTE: uses index build in *super* setUp |
| |
| // using the unsigned index because collation seems to ignore hyphens |
| IndexReader reader = unsignedIndexReader; |
| IndexSearcher search = newSearcher(reader); |
| |
| String minRP = pad(unsignedIndexDir.minR); |
| String maxRP = pad(unsignedIndexDir.maxR); |
| |
| int numDocs = reader.numDocs(); |
| |
| assertEquals("num of docs", numDocs, 1 + maxId - minId); |
| |
| ScoreDoc[] result; |
| |
| Collator c = Collator.getInstance(Locale.ENGLISH); |
| |
| // test extremes, bounded on both ends |
| |
| result = search.search(csrq("rand", minRP, maxRP, T, T, c), null, numDocs).scoreDocs; |
| assertEquals("find all", numDocs, result.length); |
| |
| result = search.search(csrq("rand", minRP, maxRP, T, F, c), null, numDocs).scoreDocs; |
| assertEquals("all but biggest", numDocs - 1, result.length); |
| |
| result = search.search(csrq("rand", minRP, maxRP, F, T, c), null, numDocs).scoreDocs; |
| assertEquals("all but smallest", numDocs - 1, result.length); |
| |
| result = search.search(csrq("rand", minRP, maxRP, F, F, c), null, numDocs).scoreDocs; |
| assertEquals("all but extremes", numDocs - 2, result.length); |
| |
| // unbounded |
| |
| result = search.search(csrq("rand", minRP, null, T, F, c), null, numDocs).scoreDocs; |
| assertEquals("smallest and up", numDocs, result.length); |
| |
| result = search.search(csrq("rand", null, maxRP, F, T, c), null, numDocs).scoreDocs; |
| assertEquals("biggest and down", numDocs, result.length); |
| |
| result = search.search(csrq("rand", minRP, null, F, F, c), null, numDocs).scoreDocs; |
| assertEquals("not smallest, but up", numDocs - 1, result.length); |
| |
| result = search.search(csrq("rand", null, maxRP, F, F, c), null, numDocs).scoreDocs; |
| assertEquals("not biggest, but down", numDocs - 1, result.length); |
| |
| // very small sets |
| |
| result = search.search(csrq("rand", minRP, minRP, F, F, c), null, numDocs).scoreDocs; |
| assertEquals("min,min,F,F,c", 0, result.length); |
| result = search.search(csrq("rand", maxRP, maxRP, F, F, c), null, numDocs).scoreDocs; |
| assertEquals("max,max,F,F,c", 0, result.length); |
| |
| result = search.search(csrq("rand", minRP, minRP, T, T, c), null, numDocs).scoreDocs; |
| assertEquals("min,min,T,T,c", 1, result.length); |
| result = search.search(csrq("rand", null, minRP, F, T, c), null, numDocs).scoreDocs; |
| assertEquals("nul,min,F,T,c", 1, result.length); |
| |
| result = search.search(csrq("rand", maxRP, maxRP, T, T, c), null, numDocs).scoreDocs; |
| assertEquals("max,max,T,T,c", 1, result.length); |
| result = search.search(csrq("rand", maxRP, null, T, F, c), null, numDocs).scoreDocs; |
| assertEquals("max,nul,T,T,c", 1, result.length); |
| |
| search.close(); |
| } |
| |
| @Test |
| public void testFarsi() throws Exception { |
| |
| /* build an index */ |
| Directory farsiIndex = newDirectory(); |
| RandomIndexWriter writer = new RandomIndexWriter(random, farsiIndex, new SimpleAnalyzer(TEST_VERSION_CURRENT)); |
| Document doc = new Document(); |
| doc.add(newField("content", "\u0633\u0627\u0628", Field.Store.YES, |
| Field.Index.NOT_ANALYZED)); |
| doc |
| .add(newField("body", "body", Field.Store.YES, |
| Field.Index.NOT_ANALYZED)); |
| writer.addDocument(doc); |
| |
| IndexReader reader = writer.getReader(); |
| writer.close(); |
| |
| IndexSearcher search = newSearcher(reader); |
| |
| // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in |
| // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi |
| // characters properly. |
| Collator c = Collator.getInstance(new Locale("ar")); |
| |
| // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi |
| // orders the U+0698 character before the U+0633 character, so the single |
| // index Term below should NOT be returned by a ConstantScoreRangeQuery |
| // with a Farsi Collator (or an Arabic one for the case when Farsi is |
| // not supported). |
| ScoreDoc[] result = search.search(csrq("content", "\u062F", "\u0698", T, T, |
| c), null, 1000).scoreDocs; |
| assertEquals("The index Term should not be included.", 0, result.length); |
| |
| result = search.search(csrq("content", "\u0633", "\u0638", T, T, c), null, |
| 1000).scoreDocs; |
| assertEquals("The index Term should be included.", 1, result.length); |
| search.close(); |
| reader.close(); |
| farsiIndex.close(); |
| } |
| |
| @Test |
| public void testDanish() throws Exception { |
| |
| /* build an index */ |
| Directory danishIndex = newDirectory(); |
| RandomIndexWriter writer = new RandomIndexWriter(random, danishIndex, new SimpleAnalyzer(TEST_VERSION_CURRENT)); |
| |
| // Danish collation orders the words below in the given order |
| // (example taken from TestSort.testInternationalSort() ). |
| String[] words = { "H\u00D8T", "H\u00C5T", "MAND" }; |
| for (int docnum = 0 ; docnum < words.length ; ++docnum) { |
| Document doc = new Document(); |
| doc.add(newField("content", words[docnum], |
| Field.Store.YES, Field.Index.NOT_ANALYZED)); |
| doc.add(newField("body", "body", |
| Field.Store.YES, Field.Index.NOT_ANALYZED)); |
| writer.addDocument(doc); |
| } |
| IndexReader reader = writer.getReader(); |
| writer.close(); |
| |
| IndexSearcher search = newSearcher(reader); |
| |
| Collator c = Collator.getInstance(new Locale("da", "dk")); |
| |
| // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ], |
| // but Danish collation does. |
| ScoreDoc[] result = search.search |
| (csrq("content", "H\u00D8T", "MAND", F, F, c), null, 1000).scoreDocs; |
| assertEquals("The index Term should be included.", 1, result.length); |
| |
| result = search.search |
| (csrq("content", "H\u00C5T", "MAND", F, F, c), null, 1000).scoreDocs; |
| assertEquals("The index Term should not be included.", 0, result.length); |
| search.close(); |
| reader.close(); |
| danishIndex.close(); |
| } |
| } |