| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| using System; |
| using System.Collections.Generic; |
| using Lucene.Net.Support; |
| using NUnit.Framework; |
| |
| using KeywordAnalyzer = Lucene.Net.Analysis.KeywordAnalyzer; |
| using StandardAnalyzer = Lucene.Net.Analysis.Standard.StandardAnalyzer; |
| using Document = Lucene.Net.Documents.Document; |
| using Field = Lucene.Net.Documents.Field; |
| using SetBasedFieldSelector = Lucene.Net.Documents.SetBasedFieldSelector; |
| using IndexReader = Lucene.Net.Index.IndexReader; |
| using IndexWriter = Lucene.Net.Index.IndexWriter; |
| using Term = Lucene.Net.Index.Term; |
| using QueryParser = Lucene.Net.QueryParsers.QueryParser; |
| using Directory = Lucene.Net.Store.Directory; |
| using MockRAMDirectory = Lucene.Net.Store.MockRAMDirectory; |
| using RAMDirectory = Lucene.Net.Store.RAMDirectory; |
| using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; |
| |
| namespace Lucene.Net.Search |
| { |
| |
| /// <summary>Tests {@link MultiSearcher} class.</summary> |
| [TestFixture] |
| public class TestMultiSearcher:LuceneTestCase |
| { |
| [Serializable] |
| private class AnonymousClassDefaultSimilarity:DefaultSimilarity |
| { |
| public AnonymousClassDefaultSimilarity(TestMultiSearcher enclosingInstance) |
| { |
| InitBlock(enclosingInstance); |
| } |
| private void InitBlock(TestMultiSearcher enclosingInstance) |
| { |
| this.enclosingInstance = enclosingInstance; |
| } |
| private TestMultiSearcher enclosingInstance; |
| public TestMultiSearcher Enclosing_Instance |
| { |
| get |
| { |
| return enclosingInstance; |
| } |
| |
| } |
| // overide all |
| public override float Idf(int docFreq, int numDocs) |
| { |
| return 100.0f; |
| } |
| public override float Coord(int overlap, int maxOverlap) |
| { |
| return 1.0f; |
| } |
| public override float LengthNorm(System.String fieldName, int numTokens) |
| { |
| return 1.0f; |
| } |
| public override float QueryNorm(float sumOfSquaredWeights) |
| { |
| return 1.0f; |
| } |
| public override float SloppyFreq(int distance) |
| { |
| return 1.0f; |
| } |
| public override float Tf(float freq) |
| { |
| return 1.0f; |
| } |
| } |
| |
| /// <summary> ReturnS a new instance of the concrete MultiSearcher class |
| /// used in this test. |
| /// </summary> |
| protected internal virtual MultiSearcher GetMultiSearcherInstance(Searcher[] searchers) |
| { |
| return new MultiSearcher(searchers); |
| } |
| |
| [Test] |
| public virtual void TestEmptyIndex() |
| { |
| // creating two directories for indices |
| Directory indexStoreA = new MockRAMDirectory(); |
| Directory indexStoreB = new MockRAMDirectory(); |
| |
| // creating a document to store |
| Document lDoc = new Document(); |
| lDoc.Add(new Field("fulltext", "Once upon a time.....", Field.Store.YES, Field.Index.ANALYZED)); |
| lDoc.Add(new Field("id", "doc1", Field.Store.YES, Field.Index.NOT_ANALYZED)); |
| lDoc.Add(new Field("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED)); |
| |
| // creating a document to store |
| Document lDoc2 = new Document(); |
| lDoc2.Add(new Field("fulltext", "in a galaxy far far away.....", Field.Store.YES, Field.Index.ANALYZED)); |
| lDoc2.Add(new Field("id", "doc2", Field.Store.YES, Field.Index.NOT_ANALYZED)); |
| lDoc2.Add(new Field("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED)); |
| |
| // creating a document to store |
| Document lDoc3 = new Document(); |
| lDoc3.Add(new Field("fulltext", "a bizarre bug manifested itself....", Field.Store.YES, Field.Index.ANALYZED)); |
| lDoc3.Add(new Field("id", "doc3", Field.Store.YES, Field.Index.NOT_ANALYZED)); |
| lDoc3.Add(new Field("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED)); |
| |
| // creating an index writer for the first index |
| IndexWriter writerA = new IndexWriter(indexStoreA, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED); |
| // creating an index writer for the second index, but writing nothing |
| IndexWriter writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED); |
| |
| //-------------------------------------------------------------------- |
| // scenario 1 |
| //-------------------------------------------------------------------- |
| |
| // writing the documents to the first index |
| writerA.AddDocument(lDoc); |
| writerA.AddDocument(lDoc2); |
| writerA.AddDocument(lDoc3); |
| writerA.Optimize(); |
| writerA.Close(); |
| |
| // closing the second index |
| writerB.Close(); |
| |
| // creating the query |
| QueryParser parser = new QueryParser(Util.Version.LUCENE_CURRENT, "fulltext", new StandardAnalyzer(Util.Version.LUCENE_CURRENT)); |
| Query query = parser.Parse("handle:1"); |
| |
| // building the searchables |
| Searcher[] searchers = new Searcher[2]; |
| // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index |
| searchers[0] = new IndexSearcher(indexStoreB, true); |
| searchers[1] = new IndexSearcher(indexStoreA, true); |
| // creating the multiSearcher |
| Searcher mSearcher = GetMultiSearcherInstance(searchers); |
| // performing the search |
| ScoreDoc[] hits = mSearcher.Search(query, null, 1000).ScoreDocs; |
| |
| Assert.AreEqual(3, hits.Length); |
| |
| // iterating over the hit documents |
| for (int i = 0; i < hits.Length; i++) |
| { |
| mSearcher.Doc(hits[i].Doc); |
| } |
| mSearcher.Close(); |
| |
| |
| //-------------------------------------------------------------------- |
| // scenario 2 |
| //-------------------------------------------------------------------- |
| |
| // adding one document to the empty index |
| writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), false, IndexWriter.MaxFieldLength.LIMITED); |
| writerB.AddDocument(lDoc); |
| writerB.Optimize(); |
| writerB.Close(); |
| |
| // building the searchables |
| Searcher[] searchers2 = new Searcher[2]; |
| // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index |
| searchers2[0] = new IndexSearcher(indexStoreB, true); |
| searchers2[1] = new IndexSearcher(indexStoreA, true); |
| // creating the mulitSearcher |
| MultiSearcher mSearcher2 = GetMultiSearcherInstance(searchers2); |
| // performing the same search |
| ScoreDoc[] hits2 = mSearcher2.Search(query, null, 1000).ScoreDocs; |
| |
| Assert.AreEqual(4, hits2.Length); |
| |
| // iterating over the hit documents |
| for (int i = 0; i < hits2.Length; i++) |
| { |
| // no exception should happen at this point |
| mSearcher2.Doc(hits2[i].Doc); |
| } |
| |
| // test the subSearcher() method: |
| Query subSearcherQuery = parser.Parse("id:doc1"); |
| hits2 = mSearcher2.Search(subSearcherQuery, null, 1000).ScoreDocs; |
| Assert.AreEqual(2, hits2.Length); |
| Assert.AreEqual(0, mSearcher2.SubSearcher(hits2[0].Doc)); // hit from searchers2[0] |
| Assert.AreEqual(1, mSearcher2.SubSearcher(hits2[1].Doc)); // hit from searchers2[1] |
| subSearcherQuery = parser.Parse("id:doc2"); |
| hits2 = mSearcher2.Search(subSearcherQuery, null, 1000).ScoreDocs; |
| Assert.AreEqual(1, hits2.Length); |
| Assert.AreEqual(1, mSearcher2.SubSearcher(hits2[0].Doc)); // hit from searchers2[1] |
| mSearcher2.Close(); |
| |
| //-------------------------------------------------------------------- |
| // scenario 3 |
| //-------------------------------------------------------------------- |
| |
| // deleting the document just added, this will cause a different exception to take place |
| Term term = new Term("id", "doc1"); |
| IndexReader readerB = IndexReader.Open(indexStoreB, false); |
| readerB.DeleteDocuments(term); |
| readerB.Close(); |
| |
| // optimizing the index with the writer |
| writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), false, IndexWriter.MaxFieldLength.LIMITED); |
| writerB.Optimize(); |
| writerB.Close(); |
| |
| // building the searchables |
| Searcher[] searchers3 = new Searcher[2]; |
| |
| searchers3[0] = new IndexSearcher(indexStoreB, true); |
| searchers3[1] = new IndexSearcher(indexStoreA, true); |
| // creating the mulitSearcher |
| Searcher mSearcher3 = GetMultiSearcherInstance(searchers3); |
| // performing the same search |
| ScoreDoc[] hits3 = mSearcher3.Search(query, null, 1000).ScoreDocs; |
| |
| Assert.AreEqual(3, hits3.Length); |
| |
| // iterating over the hit documents |
| for (int i = 0; i < hits3.Length; i++) |
| { |
| mSearcher3.Doc(hits3[i].Doc); |
| } |
| mSearcher3.Close(); |
| indexStoreA.Close(); |
| indexStoreB.Close(); |
| } |
| |
| private static Document CreateDocument(System.String contents1, System.String contents2) |
| { |
| Document document = new Document(); |
| |
| document.Add(new Field("contents", contents1, Field.Store.YES, Field.Index.NOT_ANALYZED)); |
| document.Add(new Field("other", "other contents", Field.Store.YES, Field.Index.NOT_ANALYZED)); |
| if (contents2 != null) |
| { |
| document.Add(new Field("contents", contents2, Field.Store.YES, Field.Index.NOT_ANALYZED)); |
| } |
| |
| return document; |
| } |
| |
| private static void InitIndex(Directory directory, int nDocs, bool create, System.String contents2) |
| { |
| IndexWriter indexWriter = null; |
| |
| try |
| { |
| indexWriter = new IndexWriter(directory, new KeywordAnalyzer(), create, IndexWriter.MaxFieldLength.LIMITED); |
| |
| for (int i = 0; i < nDocs; i++) |
| { |
| indexWriter.AddDocument(CreateDocument("doc" + i, contents2)); |
| } |
| } |
| finally |
| { |
| if (indexWriter != null) |
| { |
| indexWriter.Close(); |
| } |
| } |
| } |
| |
| [Test] |
| public virtual void TestFieldSelector() |
| { |
| RAMDirectory ramDirectory1, ramDirectory2; |
| IndexSearcher indexSearcher1, indexSearcher2; |
| |
| ramDirectory1 = new RAMDirectory(); |
| ramDirectory2 = new RAMDirectory(); |
| Query query = new TermQuery(new Term("contents", "doc0")); |
| |
| // Now put the documents in a different index |
| InitIndex(ramDirectory1, 10, true, null); // documents with a single token "doc0", "doc1", etc... |
| InitIndex(ramDirectory2, 10, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... |
| |
| indexSearcher1 = new IndexSearcher(ramDirectory1, true); |
| indexSearcher2 = new IndexSearcher(ramDirectory2, true); |
| |
| MultiSearcher searcher = GetMultiSearcherInstance(new Searcher[]{indexSearcher1, indexSearcher2}); |
| Assert.IsTrue(searcher != null, "searcher is null and it shouldn't be"); |
| ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; |
| Assert.IsTrue(hits != null, "hits is null and it shouldn't be"); |
| Assert.IsTrue(hits.Length == 2, hits.Length + " does not equal: " + 2); |
| Document document = searcher.Doc(hits[0].Doc); |
| Assert.IsTrue(document != null, "document is null and it shouldn't be"); |
| Assert.IsTrue(document.GetFields().Count == 2, "document.getFields() Size: " + document.GetFields().Count + " is not: " + 2); |
| //Should be one document from each directory |
| //they both have two fields, contents and other |
| ISet<string> ftl = Support.Compatibility.SetFactory.GetSet<string>(); |
| ftl.Add("other"); |
| SetBasedFieldSelector fs = new SetBasedFieldSelector(ftl, Support.Compatibility.SetFactory.GetSet<string>()); |
| document = searcher.Doc(hits[0].Doc, fs); |
| Assert.IsTrue(document != null, "document is null and it shouldn't be"); |
| Assert.IsTrue(document.GetFields().Count == 1, "document.getFields() Size: " + document.GetFields().Count + " is not: " + 1); |
| System.String value_Renamed = document.Get("contents"); |
| Assert.IsTrue(value_Renamed == null, "value is not null and it should be"); |
| value_Renamed = document.Get("other"); |
| Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be"); |
| ftl.Clear(); |
| ftl.Add("contents"); |
| fs = new SetBasedFieldSelector(ftl, Support.Compatibility.SetFactory.GetSet<string>()); |
| document = searcher.Doc(hits[1].Doc, fs); |
| value_Renamed = document.Get("contents"); |
| Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be"); |
| value_Renamed = document.Get("other"); |
| Assert.IsTrue(value_Renamed == null, "value is not null and it should be"); |
| } |
| |
| /* uncomment this when the highest score is always normalized to 1.0, even when it was < 1.0 |
| public void testNormalization1() throws IOException { |
| testNormalization(1, "Using 1 document per index:"); |
| } |
| */ |
| |
| [Test] |
| public virtual void TestNormalization10() |
| { |
| TestNormalization(10, "Using 10 documents per index:"); |
| } |
| |
| private void TestNormalization(int nDocs, System.String message) |
| { |
| Query query = new TermQuery(new Term("contents", "doc0")); |
| |
| RAMDirectory ramDirectory1; |
| IndexSearcher indexSearcher1; |
| ScoreDoc[] hits; |
| |
| ramDirectory1 = new MockRAMDirectory(); |
| |
| // First put the documents in the same index |
| InitIndex(ramDirectory1, nDocs, true, null); // documents with a single token "doc0", "doc1", etc... |
| InitIndex(ramDirectory1, nDocs, false, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... |
| |
| indexSearcher1 = new IndexSearcher(ramDirectory1, true); |
| indexSearcher1.SetDefaultFieldSortScoring(true, true); |
| |
| hits = indexSearcher1.Search(query, null, 1000).ScoreDocs; |
| |
| Assert.AreEqual(2, hits.Length, message); |
| |
| // Store the scores for use later |
| float[] scores = new float[]{hits[0].Score, hits[1].Score}; |
| |
| Assert.IsTrue(scores[0] > scores[1], message); |
| |
| indexSearcher1.Close(); |
| ramDirectory1.Close(); |
| hits = null; |
| |
| |
| |
| RAMDirectory ramDirectory2; |
| IndexSearcher indexSearcher2; |
| |
| ramDirectory1 = new MockRAMDirectory(); |
| ramDirectory2 = new MockRAMDirectory(); |
| |
| // Now put the documents in a different index |
| InitIndex(ramDirectory1, nDocs, true, null); // documents with a single token "doc0", "doc1", etc... |
| InitIndex(ramDirectory2, nDocs, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... |
| |
| indexSearcher1 = new IndexSearcher(ramDirectory1, true); |
| indexSearcher1.SetDefaultFieldSortScoring(true, true); |
| indexSearcher2 = new IndexSearcher(ramDirectory2, true); |
| indexSearcher2.SetDefaultFieldSortScoring(true, true); |
| |
| Searcher searcher = GetMultiSearcherInstance(new Searcher[]{indexSearcher1, indexSearcher2}); |
| |
| hits = searcher.Search(query, null, 1000).ScoreDocs; |
| |
| Assert.AreEqual(2, hits.Length, message); |
| |
| // The scores should be the same (within reason) |
| Assert.AreEqual(scores[0], hits[0].Score, 1e-6, message); // This will a document from ramDirectory1 |
| Assert.AreEqual(scores[1], hits[1].Score, 1e-6, message); // This will a document from ramDirectory2 |
| |
| |
| |
| // Adding a Sort.RELEVANCE object should not change anything |
| hits = searcher.Search(query, null, 1000, Sort.RELEVANCE).ScoreDocs; |
| |
| Assert.AreEqual(2, hits.Length, message); |
| |
| Assert.AreEqual(scores[0], hits[0].Score, 1e-6, message); // This will a document from ramDirectory1 |
| Assert.AreEqual(scores[1], hits[1].Score, 1e-6, message); // This will a document from ramDirectory2 |
| |
| searcher.Close(); |
| |
| ramDirectory1.Close(); |
| ramDirectory2.Close(); |
| } |
| |
| /// <summary> test that custom similarity is in effect when using MultiSearcher (LUCENE-789).</summary> |
| /// <throws> IOException </throws> |
| [Test] |
| public virtual void TestCustomSimilarity() |
| { |
| RAMDirectory dir = new RAMDirectory(); |
| InitIndex(dir, 10, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... |
| IndexSearcher srchr = new IndexSearcher(dir, true); |
| MultiSearcher msrchr = GetMultiSearcherInstance(new Searcher[]{srchr}); |
| |
| Similarity customSimilarity = new AnonymousClassDefaultSimilarity(this); |
| |
| srchr.Similarity = customSimilarity; |
| msrchr.Similarity = customSimilarity; |
| |
| Query query = new TermQuery(new Term("contents", "doc0")); |
| |
| // Get a score from IndexSearcher |
| TopDocs topDocs = srchr.Search(query, null, 1); |
| float score1 = topDocs.MaxScore; |
| |
| // Get the score from MultiSearcher |
| topDocs = msrchr.Search(query, null, 1); |
| float scoreN = topDocs.MaxScore; |
| |
| // The scores from the IndexSearcher and Multisearcher should be the same |
| // if the same similarity is used. |
| Assert.AreEqual(score1, scoreN, 1e-6, "MultiSearcher score must be equal to single searcher score!"); |
| } |
| |
| public void TestDocFreq() |
| { |
| RAMDirectory dir1 = new RAMDirectory(); |
| RAMDirectory dir2 = new RAMDirectory(); |
| |
| InitIndex(dir1, 10, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... |
| InitIndex(dir2, 5, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... |
| IndexSearcher searcher1 = new IndexSearcher(dir1, true); |
| IndexSearcher searcher2 = new IndexSearcher(dir2, true); |
| |
| MultiSearcher multiSearcher = GetMultiSearcherInstance(new Searcher[] { searcher1, searcher2 }); |
| Assert.AreEqual(15, multiSearcher.DocFreq(new Term("contents", "x"))); |
| |
| } |
| } |
| } |