blob: 9ee6b03b191c84f4298765c916966484c4468dbc [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using NUnit.Framework;
using SimpleAnalyzer = Lucene.Net.Analysis.SimpleAnalyzer;
using Document = Lucene.Net.Documents.Document;
using Field = Lucene.Net.Documents.Field;
using IndexReader = Lucene.Net.Index.IndexReader;
using IndexWriter = Lucene.Net.Index.IndexWriter;
using Term = Lucene.Net.Index.Term;
using RAMDirectory = Lucene.Net.Store.RAMDirectory;
namespace Lucene.Net.Search
{
/// <summary> A basic 'positive' Unit test class for the TermRangeFilter class.
///
/// <p/>
/// NOTE: at the moment, this class only tests for 'positive' results,
/// it does not verify the results to ensure there are no 'false positives',
/// nor does it adequately test 'negative' results. It also does not test
/// that garbage in results in an Exception.
/// </summary>
[TestFixture]
public class TestTermRangeFilter:BaseTestRangeFilter
{
public TestTermRangeFilter(System.String name):base(name)
{
}
public TestTermRangeFilter():base()
{
}
[Test]
public virtual void TestRangeFilterId()
{
IndexReader reader = IndexReader.Open(signedIndex.index, true);
IndexSearcher search = new IndexSearcher(reader);
int medId = ((maxId - minId) / 2);
System.String minIP = Pad(minId);
System.String maxIP = Pad(maxId);
System.String medIP = Pad(medId);
int numDocs = reader.NumDocs();
Assert.AreEqual(numDocs, 1 + maxId - minId, "num of docs");
ScoreDoc[] result;
Query q = new TermQuery(new Term("body", "body"));
// test id, bounded on both ends
result = search.Search(q, new TermRangeFilter("id", minIP, maxIP, T, T), numDocs).ScoreDocs;
Assert.AreEqual(numDocs, result.Length, "find all");
result = search.Search(q, new TermRangeFilter("id", minIP, maxIP, T, F), numDocs).ScoreDocs;
Assert.AreEqual(numDocs - 1, result.Length, "all but last");
result = search.Search(q, new TermRangeFilter("id", minIP, maxIP, F, T), numDocs).ScoreDocs;
Assert.AreEqual(numDocs - 1, result.Length, "all but first");
result = search.Search(q, new TermRangeFilter("id", minIP, maxIP, F, F), numDocs).ScoreDocs;
Assert.AreEqual(numDocs - 2, result.Length, "all but ends");
result = search.Search(q, new TermRangeFilter("id", medIP, maxIP, T, T), numDocs).ScoreDocs;
Assert.AreEqual(1 + maxId - medId, result.Length, "med and up");
result = search.Search(q, new TermRangeFilter("id", minIP, medIP, T, T), numDocs).ScoreDocs;
Assert.AreEqual(1 + medId - minId, result.Length, "up to med");
// unbounded id
result = search.Search(q, new TermRangeFilter("id", minIP, null, T, F), numDocs).ScoreDocs;
Assert.AreEqual(numDocs, result.Length, "min and up");
result = search.Search(q, new TermRangeFilter("id", null, maxIP, F, T), numDocs).ScoreDocs;
Assert.AreEqual(numDocs, result.Length, "max and down");
result = search.Search(q, new TermRangeFilter("id", minIP, null, F, F), numDocs).ScoreDocs;
Assert.AreEqual(numDocs - 1, result.Length, "not min, but up");
result = search.Search(q, new TermRangeFilter("id", null, maxIP, F, F), numDocs).ScoreDocs;
Assert.AreEqual(numDocs - 1, result.Length, "not max, but down");
result = search.Search(q, new TermRangeFilter("id", medIP, maxIP, T, F), numDocs).ScoreDocs;
Assert.AreEqual(maxId - medId, result.Length, "med and up, not max");
result = search.Search(q, new TermRangeFilter("id", minIP, medIP, F, T), numDocs).ScoreDocs;
Assert.AreEqual(medId - minId, result.Length, "not min, up to med");
// very small sets
result = search.Search(q, new TermRangeFilter("id", minIP, minIP, F, F), numDocs).ScoreDocs;
Assert.AreEqual(0, result.Length, "min,min,F,F");
result = search.Search(q, new TermRangeFilter("id", medIP, medIP, F, F), numDocs).ScoreDocs;
Assert.AreEqual(0, result.Length, "med,med,F,F");
result = search.Search(q, new TermRangeFilter("id", maxIP, maxIP, F, F), numDocs).ScoreDocs;
Assert.AreEqual(0, result.Length, "max,max,F,F");
result = search.Search(q, new TermRangeFilter("id", minIP, minIP, T, T), numDocs).ScoreDocs;
Assert.AreEqual(1, result.Length, "min,min,T,T");
result = search.Search(q, new TermRangeFilter("id", null, minIP, F, T), numDocs).ScoreDocs;
Assert.AreEqual(1, result.Length, "nul,min,F,T");
result = search.Search(q, new TermRangeFilter("id", maxIP, maxIP, T, T), numDocs).ScoreDocs;
Assert.AreEqual(1, result.Length, "max,max,T,T");
result = search.Search(q, new TermRangeFilter("id", maxIP, null, T, F), numDocs).ScoreDocs;
Assert.AreEqual(1, result.Length, "max,nul,T,T");
result = search.Search(q, new TermRangeFilter("id", medIP, medIP, T, T), numDocs).ScoreDocs;
Assert.AreEqual(1, result.Length, "med,med,T,T");
}
[Test]
public virtual void TestRangeFilterIdCollating()
{
IndexReader reader = IndexReader.Open(signedIndex.index, true);
IndexSearcher search = new IndexSearcher(reader);
System.Globalization.CompareInfo c = new System.Globalization.CultureInfo("en").CompareInfo;
int medId = ((maxId - minId) / 2);
System.String minIP = Pad(minId);
System.String maxIP = Pad(maxId);
System.String medIP = Pad(medId);
int numDocs = reader.NumDocs();
Assert.AreEqual(numDocs, 1 + maxId - minId, "num of docs");
Query q = new TermQuery(new Term("body", "body"));
// test id, bounded on both ends
int numHits = search.Search(q, new TermRangeFilter("id", minIP, maxIP, T, T, c), 1000).TotalHits;
Assert.AreEqual(numDocs, numHits, "find all");
numHits = search.Search(q, new TermRangeFilter("id", minIP, maxIP, T, T, c), 1000).TotalHits;
Assert.AreEqual(numDocs, numHits, "find all");
numHits = search.Search(q, new TermRangeFilter("id", minIP, maxIP, T, F, c), 1000).TotalHits;
Assert.AreEqual(numDocs - 1, numHits, "all but last");
numHits = search.Search(q, new TermRangeFilter("id", minIP, maxIP, F, T, c), 1000).TotalHits;
Assert.AreEqual(numDocs - 1, numHits, "all but first");
numHits = search.Search(q, new TermRangeFilter("id", minIP, maxIP, F, F, c), 1000).TotalHits;
Assert.AreEqual(numDocs - 2, numHits, "all but ends");
numHits = search.Search(q, new TermRangeFilter("id", medIP, maxIP, T, T, c), 1000).TotalHits;
Assert.AreEqual(1 + maxId - medId, numHits, "med and up");
numHits = search.Search(q, new TermRangeFilter("id", minIP, medIP, T, T, c), 1000).TotalHits;
Assert.AreEqual(1 + medId - minId, numHits, "up to med");
// unbounded id
numHits = search.Search(q, new TermRangeFilter("id", minIP, null, T, F, c), 1000).TotalHits;
Assert.AreEqual(numDocs, numHits, "min and up");
numHits = search.Search(q, new TermRangeFilter("id", null, maxIP, F, T, c), 1000).TotalHits;
Assert.AreEqual(numDocs, numHits, "max and down");
numHits = search.Search(q, new TermRangeFilter("id", minIP, null, F, F, c), 1000).TotalHits;
Assert.AreEqual(numDocs - 1, numHits, "not min, but up");
numHits = search.Search(q, new TermRangeFilter("id", null, maxIP, F, F, c), 1000).TotalHits;
Assert.AreEqual(numDocs - 1, numHits, "not max, but down");
numHits = search.Search(q, new TermRangeFilter("id", medIP, maxIP, T, F, c), 1000).TotalHits;
Assert.AreEqual(maxId - medId, numHits, "med and up, not max");
numHits = search.Search(q, new TermRangeFilter("id", minIP, medIP, F, T, c), 1000).TotalHits;
Assert.AreEqual(medId - minId, numHits, "not min, up to med");
// very small sets
numHits = search.Search(q, new TermRangeFilter("id", minIP, minIP, F, F, c), 1000).TotalHits;
Assert.AreEqual(0, numHits, "min,min,F,F");
numHits = search.Search(q, new TermRangeFilter("id", medIP, medIP, F, F, c), 1000).TotalHits;
Assert.AreEqual(0, numHits, "med,med,F,F");
numHits = search.Search(q, new TermRangeFilter("id", maxIP, maxIP, F, F, c), 1000).TotalHits;
Assert.AreEqual(0, numHits, "max,max,F,F");
numHits = search.Search(q, new TermRangeFilter("id", minIP, minIP, T, T, c), 1000).TotalHits;
Assert.AreEqual(1, numHits, "min,min,T,T");
numHits = search.Search(q, new TermRangeFilter("id", null, minIP, F, T, c), 1000).TotalHits;
Assert.AreEqual(1, numHits, "nul,min,F,T");
numHits = search.Search(q, new TermRangeFilter("id", maxIP, maxIP, T, T, c), 1000).TotalHits;
Assert.AreEqual(1, numHits, "max,max,T,T");
numHits = search.Search(q, new TermRangeFilter("id", maxIP, null, T, F, c), 1000).TotalHits;
Assert.AreEqual(1, numHits, "max,nul,T,T");
numHits = search.Search(q, new TermRangeFilter("id", medIP, medIP, T, T, c), 1000).TotalHits;
Assert.AreEqual(1, numHits, "med,med,T,T");
}
[Test]
public virtual void TestRangeFilterRand()
{
IndexReader reader = IndexReader.Open(signedIndex.index, true);
IndexSearcher search = new IndexSearcher(reader);
System.String minRP = Pad(signedIndex.minR);
System.String maxRP = Pad(signedIndex.maxR);
int numDocs = reader.NumDocs();
Assert.AreEqual(numDocs, 1 + maxId - minId, "num of docs");
ScoreDoc[] result;
Query q = new TermQuery(new Term("body", "body"));
// test extremes, bounded on both ends
result = search.Search(q, new TermRangeFilter("rand", minRP, maxRP, T, T), numDocs).ScoreDocs;
Assert.AreEqual(numDocs, result.Length, "find all");
result = search.Search(q, new TermRangeFilter("rand", minRP, maxRP, T, F), numDocs).ScoreDocs;
Assert.AreEqual(numDocs - 1, result.Length, "all but biggest");
result = search.Search(q, new TermRangeFilter("rand", minRP, maxRP, F, T), numDocs).ScoreDocs;
Assert.AreEqual(numDocs - 1, result.Length, "all but smallest");
result = search.Search(q, new TermRangeFilter("rand", minRP, maxRP, F, F), numDocs).ScoreDocs;
Assert.AreEqual(numDocs - 2, result.Length, "all but extremes");
// unbounded
result = search.Search(q, new TermRangeFilter("rand", minRP, null, T, F), numDocs).ScoreDocs;
Assert.AreEqual(numDocs, result.Length, "smallest and up");
result = search.Search(q, new TermRangeFilter("rand", null, maxRP, F, T), numDocs).ScoreDocs;
Assert.AreEqual(numDocs, result.Length, "biggest and down");
result = search.Search(q, new TermRangeFilter("rand", minRP, null, F, F), numDocs).ScoreDocs;
Assert.AreEqual(numDocs - 1, result.Length, "not smallest, but up");
result = search.Search(q, new TermRangeFilter("rand", null, maxRP, F, F), numDocs).ScoreDocs;
Assert.AreEqual(numDocs - 1, result.Length, "not biggest, but down");
// very small sets
result = search.Search(q, new TermRangeFilter("rand", minRP, minRP, F, F), numDocs).ScoreDocs;
Assert.AreEqual(0, result.Length, "min,min,F,F");
result = search.Search(q, new TermRangeFilter("rand", maxRP, maxRP, F, F), numDocs).ScoreDocs;
Assert.AreEqual(0, result.Length, "max,max,F,F");
result = search.Search(q, new TermRangeFilter("rand", minRP, minRP, T, T), numDocs).ScoreDocs;
Assert.AreEqual(1, result.Length, "min,min,T,T");
result = search.Search(q, new TermRangeFilter("rand", null, minRP, F, T), numDocs).ScoreDocs;
Assert.AreEqual(1, result.Length, "nul,min,F,T");
result = search.Search(q, new TermRangeFilter("rand", maxRP, maxRP, T, T), numDocs).ScoreDocs;
Assert.AreEqual(1, result.Length, "max,max,T,T");
result = search.Search(q, new TermRangeFilter("rand", maxRP, null, T, F), numDocs).ScoreDocs;
Assert.AreEqual(1, result.Length, "max,nul,T,T");
}
[Test]
public virtual void TestRangeFilterRandCollating()
{
// using the unsigned index because collation seems to ignore hyphens
IndexReader reader = IndexReader.Open(unsignedIndex.index, true);
IndexSearcher search = new IndexSearcher(reader);
System.Globalization.CompareInfo c = new System.Globalization.CultureInfo("en").CompareInfo;
System.String minRP = Pad(unsignedIndex.minR);
System.String maxRP = Pad(unsignedIndex.maxR);
int numDocs = reader.NumDocs();
Assert.AreEqual(numDocs, 1 + maxId - minId, "num of docs");
Query q = new TermQuery(new Term("body", "body"));
// test extremes, bounded on both ends
int numHits = search.Search(q, new TermRangeFilter("rand", minRP, maxRP, T, T, c), 1000).TotalHits;
Assert.AreEqual(numDocs, numHits, "find all");
numHits = search.Search(q, new TermRangeFilter("rand", minRP, maxRP, T, F, c), 1000).TotalHits;
Assert.AreEqual(numDocs - 1, numHits, "all but biggest");
numHits = search.Search(q, new TermRangeFilter("rand", minRP, maxRP, F, T, c), 1000).TotalHits;
Assert.AreEqual(numDocs - 1, numHits, "all but smallest");
numHits = search.Search(q, new TermRangeFilter("rand", minRP, maxRP, F, F, c), 1000).TotalHits;
Assert.AreEqual(numDocs - 2, numHits, "all but extremes");
// unbounded
numHits = search.Search(q, new TermRangeFilter("rand", minRP, null, T, F, c), 1000).TotalHits;
Assert.AreEqual(numDocs, numHits, "smallest and up");
numHits = search.Search(q, new TermRangeFilter("rand", null, maxRP, F, T, c), 1000).TotalHits;
Assert.AreEqual(numDocs, numHits, "biggest and down");
numHits = search.Search(q, new TermRangeFilter("rand", minRP, null, F, F, c), 1000).TotalHits;
Assert.AreEqual(numDocs - 1, numHits, "not smallest, but up");
numHits = search.Search(q, new TermRangeFilter("rand", null, maxRP, F, F, c), 1000).TotalHits;
Assert.AreEqual(numDocs - 1, numHits, "not biggest, but down");
// very small sets
numHits = search.Search(q, new TermRangeFilter("rand", minRP, minRP, F, F, c), 1000).TotalHits;
Assert.AreEqual(0, numHits, "min,min,F,F");
numHits = search.Search(q, new TermRangeFilter("rand", maxRP, maxRP, F, F, c), 1000).TotalHits;
Assert.AreEqual(0, numHits, "max,max,F,F");
numHits = search.Search(q, new TermRangeFilter("rand", minRP, minRP, T, T, c), 1000).TotalHits;
Assert.AreEqual(1, numHits, "min,min,T,T");
numHits = search.Search(q, new TermRangeFilter("rand", null, minRP, F, T, c), 1000).TotalHits;
Assert.AreEqual(1, numHits, "nul,min,F,T");
numHits = search.Search(q, new TermRangeFilter("rand", maxRP, maxRP, T, T, c), 1000).TotalHits;
Assert.AreEqual(1, numHits, "max,max,T,T");
numHits = search.Search(q, new TermRangeFilter("rand", maxRP, null, T, F, c), 1000).TotalHits;
Assert.AreEqual(1, numHits, "max,nul,T,T");
}
[Test]
public virtual void TestFarsi()
{
/* build an index */
RAMDirectory farsiIndex = new RAMDirectory();
IndexWriter writer = new IndexWriter(farsiIndex, new SimpleAnalyzer(), T, IndexWriter.MaxFieldLength.LIMITED);
Document doc = new Document();
doc.Add(new Field("content", "\u0633\u0627\u0628", Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.Add(new Field("body", "body", Field.Store.YES, Field.Index.NOT_ANALYZED));
writer.AddDocument(doc);
writer.Optimize();
writer.Close();
IndexReader reader = IndexReader.Open(farsiIndex, true);
IndexSearcher search = new IndexSearcher(reader);
Query q = new TermQuery(new Term("body", "body"));
// Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
// RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
// characters properly.
System.Globalization.CompareInfo collator = new System.Globalization.CultureInfo("ar").CompareInfo;
// Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
// orders the U+0698 character before the U+0633 character, so the single
// index Term below should NOT be returned by a TermRangeFilter with a Farsi
// Collator (or an Arabic one for the case when Farsi is not supported).
int numHits = search.Search(q, new TermRangeFilter("content", "\u062F", "\u0698", T, T, collator), 1000).TotalHits;
Assert.AreEqual(0, numHits, "The index Term should not be included.");
numHits = search.Search(q, new TermRangeFilter("content", "\u0633", "\u0638", T, T, collator), 1000).TotalHits;
Assert.AreEqual(1, numHits, "The index Term should be included.");
search.Close();
}
[Test]
public virtual void TestDanish()
{
/* build an index */
RAMDirectory danishIndex = new RAMDirectory();
IndexWriter writer = new IndexWriter(danishIndex, new SimpleAnalyzer(), T, IndexWriter.MaxFieldLength.LIMITED);
// Danish collation orders the words below in the given order
// (example taken from TestSort.testInternationalSort() ).
System.String[] words = new System.String[]{"H\u00D8T", "H\u00C5T", "MAND"};
for (int docnum = 0; docnum < words.Length; ++docnum)
{
Document doc = new Document();
doc.Add(new Field("content", words[docnum], Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.Add(new Field("body", "body", Field.Store.YES, Field.Index.NOT_ANALYZED));
writer.AddDocument(doc);
}
writer.Optimize();
writer.Close();
IndexReader reader = IndexReader.Open(danishIndex, true);
IndexSearcher search = new IndexSearcher(reader);
Query q = new TermQuery(new Term("body", "body"));
System.Globalization.CompareInfo collator = new System.Globalization.CultureInfo("da" + "-" + "dk").CompareInfo;
Query query = new TermRangeQuery("content", "H\u00D8T", "MAND", false, false, collator);
// Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ],
// but Danish collation does.
int numHits =
search.Search(q, new TermRangeFilter("content", "H\u00D8T", "MAND", F, F, collator), 1000).TotalHits;
Assert.AreEqual(1, numHits, "The index Term should be included.");
numHits = search.Search(q, new TermRangeFilter("content", "H\u00C5T", "MAND", F, F, collator), 1000).TotalHits;
Assert.AreEqual(0, numHits, "The index Term should not be included.");
search.Close();
}
}
}