| using J2N.Collections.Generic.Extensions; |
| using Lucene.Net.Support; |
| using NUnit.Framework; |
| using System; |
| using System.Collections.Generic; |
| using System.Globalization; |
| using Assert = Lucene.Net.TestFramework.Assert; |
| |
| namespace Lucene.Net.Facet.Taxonomy |
| { |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| |
| using DirectoryReader = Lucene.Net.Index.DirectoryReader; |
| using DirectoryTaxonomyReader = Lucene.Net.Facet.Taxonomy.Directory.DirectoryTaxonomyReader; |
| using DirectoryTaxonomyWriter = Lucene.Net.Facet.Taxonomy.Directory.DirectoryTaxonomyWriter; |
| using Document = Lucene.Net.Documents.Document; |
| using IndexSearcher = Lucene.Net.Search.IndexSearcher; |
| using IndexWriter = Lucene.Net.Index.IndexWriter; |
| using IndexWriterConfig = Lucene.Net.Index.IndexWriterConfig; |
| using IOUtils = Lucene.Net.Util.IOUtils; |
| using MatchAllDocsQuery = Lucene.Net.Search.MatchAllDocsQuery; |
| using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer; |
| using Store = Lucene.Net.Documents.Field.Store; |
| using StringField = Lucene.Net.Documents.StringField; |
| using Term = Lucene.Net.Index.Term; |
| using TermQuery = Lucene.Net.Search.TermQuery; |
| [TestFixture] |
| public class TestTaxonomyFacetCounts2 : FacetTestCase |
| { |
| |
| private static readonly Term A = new Term("f", "a"); |
| private const string CP_A = "A", CP_B = "B"; |
| private const string CP_C = "C", CP_D = "D"; // indexed w/ NO_PARENTS |
| private const int NUM_CHILDREN_CP_A = 5, NUM_CHILDREN_CP_B = 3; |
| private const int NUM_CHILDREN_CP_C = 5, NUM_CHILDREN_CP_D = 5; |
| private static readonly FacetField[] CATEGORIES_A, CATEGORIES_B; |
| private static readonly FacetField[] CATEGORIES_C, CATEGORIES_D; |
| |
| [System.Diagnostics.CodeAnalysis.SuppressMessage("Performance", "CA1810:Initialize reference type static fields inline", Justification = "Complexity")] |
| static TestTaxonomyFacetCounts2() |
| { |
| CATEGORIES_A = new FacetField[NUM_CHILDREN_CP_A]; |
| for (int i = 0; i < NUM_CHILDREN_CP_A; i++) |
| { |
| CATEGORIES_A[i] = new FacetField(CP_A, Convert.ToString(i, CultureInfo.InvariantCulture)); |
| } |
| CATEGORIES_B = new FacetField[NUM_CHILDREN_CP_B]; |
| for (int i = 0; i < NUM_CHILDREN_CP_B; i++) |
| { |
| CATEGORIES_B[i] = new FacetField(CP_B, Convert.ToString(i, CultureInfo.InvariantCulture)); |
| } |
| |
| // NO_PARENTS categories |
| CATEGORIES_C = new FacetField[NUM_CHILDREN_CP_C]; |
| for (int i = 0; i < NUM_CHILDREN_CP_C; i++) |
| { |
| CATEGORIES_C[i] = new FacetField(CP_C, Convert.ToString(i, CultureInfo.InvariantCulture)); |
| } |
| |
| // Multi-level categories |
| CATEGORIES_D = new FacetField[NUM_CHILDREN_CP_D]; |
| for (int i = 0; i < NUM_CHILDREN_CP_D; i++) |
| { |
| string val = Convert.ToString(i, CultureInfo.InvariantCulture); |
| CATEGORIES_D[i] = new FacetField(CP_D, val, val + val); // e.g. D/1/11, D/2/22... |
| } |
| } |
| |
| private static Net.Store.Directory indexDir, taxoDir; |
| private static IDictionary<string, int?> allExpectedCounts, termExpectedCounts; |
| |
| [OneTimeTearDown] |
| public override void AfterClass() // LUCENENET specific - renamed from AfterClassCountingFacetsAggregatorTest() to ensure calling order |
| { |
| IOUtils.Dispose(indexDir, taxoDir); |
| base.AfterClass(); |
| } |
| |
| private static IList<FacetField> RandomCategories(Random random) |
| { |
| // add random categories from the two dimensions, ensuring that the same |
| // category is not added twice. |
| int numFacetsA = random.Next(3) + 1; // 1-3 |
| int numFacetsB = random.Next(2) + 1; // 1-2 |
| List<FacetField> categories_a = new List<FacetField>(); |
| categories_a.AddRange(CATEGORIES_A); |
| List<FacetField> categories_b = new List<FacetField>(); |
| categories_b.AddRange(CATEGORIES_B); |
| categories_a.Shuffle(Random); |
| categories_b.Shuffle(Random); |
| |
| List<FacetField> categories = new List<FacetField>(); |
| categories.AddRange(categories_a.SubList(0, numFacetsA)); |
| categories.AddRange(categories_b.SubList(0, numFacetsB)); |
| |
| // add the NO_PARENT categories |
| categories.Add(CATEGORIES_C[Util.LuceneTestCase.Random.Next(NUM_CHILDREN_CP_C)]); |
| categories.Add(CATEGORIES_D[Util.LuceneTestCase.Random.Next(NUM_CHILDREN_CP_D)]); |
| |
| return categories; |
| } |
| |
| private static void AddField(Document doc) |
| { |
| doc.Add(new StringField(A.Field, A.Text(), Store.NO)); |
| } |
| |
| private static void AddFacets(Document doc, FacetsConfig config, bool updateTermExpectedCounts) |
| { |
| IList<FacetField> docCategories = RandomCategories(Random); |
| foreach (FacetField ff in docCategories) |
| { |
| doc.Add(ff); |
| string cp = ff.Dim + "/" + ff.Path[0]; |
| allExpectedCounts[cp] = allExpectedCounts[cp] + 1; |
| if (updateTermExpectedCounts) |
| { |
| termExpectedCounts[cp] = termExpectedCounts[cp] + 1; |
| } |
| } |
| // add 1 to each NO_PARENTS dimension |
| allExpectedCounts[CP_B] = allExpectedCounts[CP_B] + 1; |
| allExpectedCounts[CP_C] = allExpectedCounts[CP_C] + 1; |
| allExpectedCounts[CP_D] = allExpectedCounts[CP_D] + 1; |
| if (updateTermExpectedCounts) |
| { |
| termExpectedCounts[CP_B] = termExpectedCounts[CP_B] + 1; |
| termExpectedCounts[CP_C] = termExpectedCounts[CP_C] + 1; |
| termExpectedCounts[CP_D] = termExpectedCounts[CP_D] + 1; |
| } |
| } |
| |
| private static FacetsConfig Config |
| { |
| get |
| { |
| FacetsConfig config = new FacetsConfig(); |
| config.SetMultiValued("A", true); |
| config.SetMultiValued("B", true); |
| config.SetRequireDimCount("B", true); |
| config.SetHierarchical("D", true); |
| return config; |
| } |
| } |
| |
| private static void IndexDocsNoFacets(IndexWriter indexWriter) |
| { |
| int numDocs = AtLeast(2); |
| for (int i = 0; i < numDocs; i++) |
| { |
| Document doc = new Document(); |
| AddField(doc); |
| indexWriter.AddDocument(doc); |
| } |
| indexWriter.Commit(); // flush a segment |
| } |
| |
| private static void IndexDocsWithFacetsNoTerms(IndexWriter indexWriter, ITaxonomyWriter taxoWriter, IDictionary<string, int?> expectedCounts) |
| { |
| Random random = Random; |
| int numDocs = AtLeast(random, 2); |
| FacetsConfig config = Config; |
| for (int i = 0; i < numDocs; i++) |
| { |
| Document doc = new Document(); |
| AddFacets(doc, config, false); |
| indexWriter.AddDocument(config.Build(taxoWriter, doc)); |
| } |
| indexWriter.Commit(); // flush a segment |
| } |
| |
| private static void IndexDocsWithFacetsAndTerms(IndexWriter indexWriter, ITaxonomyWriter taxoWriter, IDictionary<string, int?> expectedCounts) |
| { |
| Random random = Random; |
| int numDocs = AtLeast(random, 2); |
| FacetsConfig config = Config; |
| for (int i = 0; i < numDocs; i++) |
| { |
| Document doc = new Document(); |
| AddFacets(doc, config, true); |
| AddField(doc); |
| indexWriter.AddDocument(config.Build(taxoWriter, doc)); |
| } |
| indexWriter.Commit(); // flush a segment |
| } |
| |
| private static void IndexDocsWithFacetsAndSomeTerms(IndexWriter indexWriter, ITaxonomyWriter taxoWriter, IDictionary<string, int?> expectedCounts) |
| { |
| Random random = Random; |
| int numDocs = AtLeast(random, 2); |
| FacetsConfig config = Config; |
| for (int i = 0; i < numDocs; i++) |
| { |
| Document doc = new Document(); |
| bool hasContent = random.NextBoolean(); |
| if (hasContent) |
| { |
| AddField(doc); |
| } |
| AddFacets(doc, config, hasContent); |
| indexWriter.AddDocument(config.Build(taxoWriter, doc)); |
| } |
| indexWriter.Commit(); // flush a segment |
| } |
| |
| // initialize expectedCounts w/ 0 for all categories |
| private static IDictionary<string, int?> newCounts() |
| { |
| IDictionary<string, int?> counts = new Dictionary<string, int?>(); |
| counts[CP_A] = 0; |
| counts[CP_B] = 0; |
| counts[CP_C] = 0; |
| counts[CP_D] = 0; |
| foreach (FacetField ff in CATEGORIES_A) |
| { |
| counts[ff.Dim + "/" + ff.Path[0]] = 0; |
| } |
| foreach (FacetField ff in CATEGORIES_B) |
| { |
| counts[ff.Dim + "/" + ff.Path[0]] = 0; |
| } |
| foreach (FacetField ff in CATEGORIES_C) |
| { |
| counts[ff.Dim + "/" + ff.Path[0]] = 0; |
| } |
| foreach (FacetField ff in CATEGORIES_D) |
| { |
| counts[ff.Dim + "/" + ff.Path[0]] = 0; |
| } |
| return counts; |
| } |
| |
| [OneTimeSetUp] |
| public override void BeforeClass() // LUCENENET specific - renamed from BeforeClassCountingFacetsAggregatorTest() to ensure calling order |
| { |
| base.BeforeClass(); |
| |
| indexDir = NewDirectory(); |
| taxoDir = NewDirectory(); |
| |
| // create an index which has: |
| // 1. Segment with no categories, but matching results |
| // 2. Segment w/ categories, but no results |
| // 3. Segment w/ categories and results |
| // 4. Segment w/ categories, but only some results |
| |
| IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); |
| //conf.MergePolicy = NoMergePolicy.INSTANCE; // prevent merges, so we can control the index segments |
| IndexWriter indexWriter = new IndexWriter(indexDir, conf); |
| ITaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); |
| |
| allExpectedCounts = newCounts(); |
| termExpectedCounts = newCounts(); |
| |
| // segment w/ no categories |
| IndexDocsNoFacets(indexWriter); |
| |
| // segment w/ categories, no content |
| IndexDocsWithFacetsNoTerms(indexWriter, taxoWriter, allExpectedCounts); |
| |
| // segment w/ categories and content |
| IndexDocsWithFacetsAndTerms(indexWriter, taxoWriter, allExpectedCounts); |
| |
| // segment w/ categories and some content |
| IndexDocsWithFacetsAndSomeTerms(indexWriter, taxoWriter, allExpectedCounts); |
| |
| IOUtils.Dispose(indexWriter, taxoWriter); |
| } |
| |
| [Test] |
| public virtual void TestDifferentNumResults() |
| { |
| // test the collector w/ FacetRequests and different numResults |
| DirectoryReader indexReader = DirectoryReader.Open(indexDir); |
| var taxoReader = new DirectoryTaxonomyReader(taxoDir); |
| IndexSearcher searcher = NewSearcher(indexReader); |
| |
| FacetsCollector sfc = new FacetsCollector(); |
| TermQuery q = new TermQuery(A); |
| searcher.Search(q, sfc); |
| Facets facets = GetTaxonomyFacetCounts(taxoReader, Config, sfc); |
| FacetResult result = facets.GetTopChildren(NUM_CHILDREN_CP_A, CP_A); |
| Assert.AreEqual(-1, (int)result.Value); |
| foreach (LabelAndValue labelValue in result.LabelValues) |
| { |
| Assert.AreEqual(termExpectedCounts[CP_A + "/" + labelValue.Label].GetValueOrDefault(), labelValue.Value); |
| } |
| result = facets.GetTopChildren(NUM_CHILDREN_CP_B, CP_B); |
| Assert.AreEqual(termExpectedCounts[CP_B].GetValueOrDefault(), result.Value); |
| foreach (LabelAndValue labelValue in result.LabelValues) |
| { |
| Assert.AreEqual(termExpectedCounts[CP_B + "/" + labelValue.Label].GetValueOrDefault(), labelValue.Value); |
| } |
| |
| IOUtils.Dispose(indexReader, taxoReader); |
| } |
| |
| [Test] |
| public virtual void TestAllCounts() |
| { |
| DirectoryReader indexReader = DirectoryReader.Open(indexDir); |
| var taxoReader = new DirectoryTaxonomyReader(taxoDir); |
| IndexSearcher searcher = NewSearcher(indexReader); |
| |
| FacetsCollector sfc = new FacetsCollector(); |
| searcher.Search(new MatchAllDocsQuery(), sfc); |
| |
| Facets facets = GetTaxonomyFacetCounts(taxoReader, Config, sfc); |
| |
| FacetResult result = facets.GetTopChildren(NUM_CHILDREN_CP_A, CP_A); |
| Assert.AreEqual(-1, (int)result.Value); |
| int prevValue = int.MaxValue; |
| foreach (LabelAndValue labelValue in result.LabelValues) |
| { |
| Assert.AreEqual(allExpectedCounts[CP_A + "/" + labelValue.Label].GetValueOrDefault(), labelValue.Value); |
| Assert.True((int)labelValue.Value <= prevValue, "wrong sort order of sub results: labelValue.value=" + labelValue.Value + " prevValue=" + prevValue); |
| prevValue = (int)labelValue.Value; |
| } |
| |
| result = facets.GetTopChildren(NUM_CHILDREN_CP_B, CP_B); |
| Assert.AreEqual(allExpectedCounts[CP_B].GetValueOrDefault(), result.Value); |
| prevValue = int.MaxValue; |
| foreach (LabelAndValue labelValue in result.LabelValues) |
| { |
| Assert.AreEqual(allExpectedCounts[CP_B + "/" + labelValue.Label].GetValueOrDefault(), labelValue.Value); |
| Assert.True((int)labelValue.Value <= prevValue, "wrong sort order of sub results: labelValue.value=" + labelValue.Value + " prevValue=" + prevValue); |
| prevValue = (int)labelValue.Value; |
| } |
| |
| IOUtils.Dispose(indexReader, taxoReader); |
| } |
| |
| [Test] |
| public virtual void TestBigNumResults() |
| { |
| DirectoryReader indexReader = DirectoryReader.Open(indexDir); |
| var taxoReader = new DirectoryTaxonomyReader(taxoDir); |
| IndexSearcher searcher = NewSearcher(indexReader); |
| |
| FacetsCollector sfc = new FacetsCollector(); |
| searcher.Search(new MatchAllDocsQuery(), sfc); |
| |
| Facets facets = GetTaxonomyFacetCounts(taxoReader, Config, sfc); |
| |
| FacetResult result = facets.GetTopChildren(int.MaxValue, CP_A); |
| Assert.AreEqual(-1, (int)result.Value); |
| foreach (LabelAndValue labelValue in result.LabelValues) |
| { |
| Assert.AreEqual(allExpectedCounts[CP_A + "/" + labelValue.Label].GetValueOrDefault(), labelValue.Value); |
| } |
| result = facets.GetTopChildren(int.MaxValue, CP_B); |
| Assert.AreEqual(allExpectedCounts[CP_B].GetValueOrDefault(), result.Value); |
| foreach (LabelAndValue labelValue in result.LabelValues) |
| { |
| Assert.AreEqual(allExpectedCounts[CP_B + "/" + labelValue.Label].GetValueOrDefault(), labelValue.Value); |
| } |
| |
| IOUtils.Dispose(indexReader, taxoReader); |
| } |
| |
| [Test] |
| public virtual void TestNoParents() |
| { |
| DirectoryReader indexReader = DirectoryReader.Open(indexDir); |
| var taxoReader = new DirectoryTaxonomyReader(taxoDir); |
| IndexSearcher searcher = NewSearcher(indexReader); |
| |
| var sfc = new FacetsCollector(); |
| searcher.Search(new MatchAllDocsQuery(), sfc); |
| |
| Facets facets = GetTaxonomyFacetCounts(taxoReader, Config, sfc); |
| |
| FacetResult result = facets.GetTopChildren(NUM_CHILDREN_CP_C, CP_C); |
| Assert.AreEqual(allExpectedCounts[CP_C].GetValueOrDefault(), result.Value); |
| foreach (LabelAndValue labelValue in result.LabelValues) |
| { |
| Assert.AreEqual(allExpectedCounts[CP_C + "/" + labelValue.Label].GetValueOrDefault(), labelValue.Value); |
| } |
| result = facets.GetTopChildren(NUM_CHILDREN_CP_D, CP_D); |
| Assert.AreEqual(allExpectedCounts[CP_C].GetValueOrDefault(), result.Value); |
| foreach (LabelAndValue labelValue in result.LabelValues) |
| { |
| Assert.AreEqual(allExpectedCounts[CP_D + "/" + labelValue.Label].GetValueOrDefault(), labelValue.Value); |
| } |
| |
| IOUtils.Dispose(indexReader, taxoReader); |
| } |
| } |
| |
| } |