blob: c5fd27b2690724a88c28950b511241c3a1f3f15f [file] [log] [blame]
using System.Collections.Generic;
using Lucene.Net.Documents;
using Lucene.Net.Index.Extensions;
using NUnit.Framework;
using JCG = J2N.Collections.Generic;
namespace Lucene.Net.Search
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using Analyzer = Lucene.Net.Analysis.Analyzer;
using AtomicReaderContext = Lucene.Net.Index.AtomicReaderContext;
using ChildScorer = Lucene.Net.Search.Scorer.ChildScorer;
using Directory = Lucene.Net.Store.Directory;
using Document = Documents.Document;
using IndexReader = Lucene.Net.Index.IndexReader;
using IndexWriterConfig = Lucene.Net.Index.IndexWriterConfig;
using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer;
using RandomIndexWriter = Lucene.Net.Index.RandomIndexWriter;
using Store = Field.Store;
using Term = Lucene.Net.Index.Term;
using TextField = TextField;
// TODO: refactor to a base class, that collects freqs from the scorer tree
// and test all queries with it
[TestFixture]
public class TestBooleanQueryVisitSubscorers : LuceneTestCase
{
internal Analyzer Analyzer;
internal IndexReader Reader;
internal IndexSearcher Searcher;
internal Directory Dir;
internal const string F1 = "title";
internal const string F2 = "body";
[SetUp]
public override void SetUp()
{
base.SetUp();
Analyzer = new MockAnalyzer(Random);
Dir = NewDirectory();
IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, Analyzer);
config.SetMergePolicy(NewLogMergePolicy()); // we will use docids to validate
RandomIndexWriter writer = new RandomIndexWriter(Random, Dir, config);
writer.AddDocument(Doc("lucene", "lucene is a very popular search engine library"));
writer.AddDocument(Doc("solr", "solr is a very popular search server and is using lucene"));
writer.AddDocument(Doc("nutch", "nutch is an internet search engine with web crawler and is using lucene and hadoop"));
Reader = writer.GetReader();
writer.Dispose();
Searcher = NewSearcher(Reader);
}
[TearDown]
public override void TearDown()
{
Reader.Dispose();
Dir.Dispose();
base.TearDown();
}
[Test]
public virtual void TestDisjunctions()
{
BooleanQuery bq = new BooleanQuery();
bq.Add(new TermQuery(new Term(F1, "lucene")), Occur.SHOULD);
bq.Add(new TermQuery(new Term(F2, "lucene")), Occur.SHOULD);
bq.Add(new TermQuery(new Term(F2, "search")), Occur.SHOULD);
IDictionary<int, int> tfs = GetDocCounts(Searcher, bq);
Assert.AreEqual(3, tfs.Count); // 3 documents
Assert.AreEqual(3, (int)tfs[0]); // f1:lucene + f2:lucene + f2:search
Assert.AreEqual(2, (int)tfs[1]); // f2:search + f2:lucene
Assert.AreEqual(2, (int)tfs[2]); // f2:search + f2:lucene
}
[Test]
public virtual void TestNestedDisjunctions()
{
BooleanQuery bq = new BooleanQuery();
bq.Add(new TermQuery(new Term(F1, "lucene")), Occur.SHOULD);
BooleanQuery bq2 = new BooleanQuery();
bq2.Add(new TermQuery(new Term(F2, "lucene")), Occur.SHOULD);
bq2.Add(new TermQuery(new Term(F2, "search")), Occur.SHOULD);
bq.Add(bq2, Occur.SHOULD);
IDictionary<int, int> tfs = GetDocCounts(Searcher, bq);
Assert.AreEqual(3, tfs.Count); // 3 documents
Assert.AreEqual(3, (int)tfs[0]); // f1:lucene + f2:lucene + f2:search
Assert.AreEqual(2, (int)tfs[1]); // f2:search + f2:lucene
Assert.AreEqual(2, (int)tfs[2]); // f2:search + f2:lucene
}
[Test]
public virtual void TestConjunctions()
{
BooleanQuery bq = new BooleanQuery();
bq.Add(new TermQuery(new Term(F2, "lucene")), Occur.MUST);
bq.Add(new TermQuery(new Term(F2, "is")), Occur.MUST);
IDictionary<int, int> tfs = GetDocCounts(Searcher, bq);
Assert.AreEqual(3, tfs.Count); // 3 documents
Assert.AreEqual(2, (int)tfs[0]); // f2:lucene + f2:is
Assert.AreEqual(3, (int)tfs[1]); // f2:is + f2:is + f2:lucene
Assert.AreEqual(3, (int)tfs[2]); // f2:is + f2:is + f2:lucene
}
internal static Document Doc(string v1, string v2)
{
Document doc = new Document();
doc.Add(new TextField(F1, v1, Store.YES));
doc.Add(new TextField(F2, v2, Store.YES));
return doc;
}
internal static IDictionary<int, int> GetDocCounts(IndexSearcher searcher, Query query)
{
MyCollector collector = new MyCollector();
searcher.Search(query, collector);
return collector.DocCounts;
}
internal class MyCollector : ICollector
{
internal TopDocsCollector<ScoreDoc> Collector;
internal int DocBase;
public readonly IDictionary<int, int> DocCounts = new Dictionary<int, int>();
internal readonly ISet<Scorer> TqsSet = new JCG.HashSet<Scorer>();
internal MyCollector()
{
Collector = TopScoreDocCollector.Create(10, true);
}
public virtual bool AcceptsDocsOutOfOrder
{
get { return false; }
}
public virtual void Collect(int doc)
{
int freq = 0;
foreach (Scorer scorer in TqsSet)
{
if (doc == scorer.DocID)
{
freq += scorer.Freq;
}
}
DocCounts[doc + DocBase] = freq;
Collector.Collect(doc);
}
public virtual void SetNextReader(AtomicReaderContext context)
{
this.DocBase = context.DocBase;
Collector.SetNextReader(context);
}
public virtual void SetScorer(Scorer scorer)
{
Collector.SetScorer(scorer);
TqsSet.Clear();
FillLeaves(scorer, TqsSet);
}
internal virtual void FillLeaves(Scorer scorer, ISet<Scorer> set)
{
if (scorer.Weight.Query is TermQuery)
{
set.Add(scorer);
}
else
{
foreach (ChildScorer child in scorer.GetChildren())
{
FillLeaves(child.Child, set);
}
}
}
public virtual TopDocs GetTopDocs()
{
return Collector.GetTopDocs();
}
public virtual int Freq(int doc)
{
return DocCounts[doc];
}
}
}
}