blob: 9f2ef49fa3aa42a127754306b4fb733108c80ab8 [file] [log] [blame]
// Lucene version compatibility level 4.8.1
using Lucene.Net.Index.Extensions;
using Lucene.Net.Support.IO;
using NUnit.Framework;
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Text;
using Assert = Lucene.Net.TestFramework.Assert;
using Console = Lucene.Net.Util.SystemConsole;
using JCG = J2N.Collections.Generic;
namespace Lucene.Net.Facet.Taxonomy
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using DefaultSimilarity = Lucene.Net.Search.Similarities.DefaultSimilarity;
using DirectoryReader = Lucene.Net.Index.DirectoryReader;
using DirectoryTaxonomyReader = Lucene.Net.Facet.Taxonomy.Directory.DirectoryTaxonomyReader;
using DirectoryTaxonomyWriter = Lucene.Net.Facet.Taxonomy.Directory.DirectoryTaxonomyWriter;
using Document = Lucene.Net.Documents.Document;
using Field = Lucene.Net.Documents.Field;
using IndexSearcher = Lucene.Net.Search.IndexSearcher;
using IndexWriter = Lucene.Net.Index.IndexWriter;
using IndexWriterConfig = Lucene.Net.Index.IndexWriterConfig;
using IOUtils = Lucene.Net.Util.IOUtils;
using MatchAllDocsQuery = Lucene.Net.Search.MatchAllDocsQuery;
using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer;
using OpenMode = Lucene.Net.Index.OpenMode;
using PerFieldSimilarityWrapper = Lucene.Net.Search.Similarities.PerFieldSimilarityWrapper;
using Query = Lucene.Net.Search.Query;
using RandomIndexWriter = Lucene.Net.Index.RandomIndexWriter;
using Similarity = Lucene.Net.Search.Similarities.Similarity;
using StringField = Lucene.Net.Documents.StringField;
using Term = Lucene.Net.Index.Term;
using TermQuery = Lucene.Net.Search.TermQuery;
using TestUtil = Lucene.Net.Util.TestUtil;
[TestFixture]
public class TestTaxonomyFacetCounts : FacetTestCase
{
[Test]
public virtual void TestBasic()
{
Store.Directory dir = NewDirectory();
Store.Directory taxoDir = NewDirectory();
// Writes facet ords to a separate directory from the
// main index:
var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);
FacetsConfig config = new FacetsConfig();
config.SetHierarchical("Publish Date", true);
RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, dir);
Document doc = new Document();
doc.Add(new FacetField("Author", "Bob"));
doc.Add(new FacetField("Publish Date", "2010", "10", "15"));
writer.AddDocument(config.Build(taxoWriter, doc));
doc = new Document();
doc.Add(new FacetField("Author", "Lisa"));
doc.Add(new FacetField("Publish Date", "2010", "10", "20"));
writer.AddDocument(config.Build(taxoWriter, doc));
doc = new Document();
doc.Add(new FacetField("Author", "Lisa"));
doc.Add(new FacetField("Publish Date", "2012", "1", "1"));
writer.AddDocument(config.Build(taxoWriter, doc));
doc = new Document();
doc.Add(new FacetField("Author", "Susan"));
doc.Add(new FacetField("Publish Date", "2012", "1", "7"));
writer.AddDocument(config.Build(taxoWriter, doc));
doc = new Document();
doc.Add(new FacetField("Author", "Frank"));
doc.Add(new FacetField("Publish Date", "1999", "5", "5"));
writer.AddDocument(config.Build(taxoWriter, doc));
// NRT open
IndexSearcher searcher = NewSearcher(writer.GetReader());
// NRT open
var taxoReader = new DirectoryTaxonomyReader(taxoWriter);
// Aggregate the facet counts:
FacetsCollector c = new FacetsCollector();
// MatchAllDocsQuery is for "browsing" (counts facets
// for all non-deleted docs in the index); normally
// you'd use a "normal" query, and use MultiCollector to
// wrap collecting the "normal" hits and also facets:
searcher.Search(new MatchAllDocsQuery(), c);
Facets facets = new FastTaxonomyFacetCounts(taxoReader, config, c);
// Retrieve & verify results:
Assert.AreEqual("dim=Publish Date path=[] value=5 childCount=3\n 2010 (2)\n 2012 (2)\n 1999 (1)\n", facets.GetTopChildren(10, "Publish Date").ToString());
Assert.AreEqual("dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", facets.GetTopChildren(10, "Author").ToString());
// Now user drills down on Publish Date/2010:
DrillDownQuery q2 = new DrillDownQuery(config);
q2.Add("Publish Date", "2010");
c = new FacetsCollector();
searcher.Search(q2, c);
facets = new FastTaxonomyFacetCounts(taxoReader, config, c);
Assert.AreEqual("dim=Author path=[] value=2 childCount=2\n Bob (1)\n Lisa (1)\n", facets.GetTopChildren(10, "Author").ToString());
Assert.AreEqual(1, facets.GetSpecificValue("Author", "Lisa"));
Assert.IsNull(facets.GetTopChildren(10, "Non exitent dim"));
// Smoke test PrintTaxonomyStats:
string result;
using (ByteArrayOutputStream bos = new ByteArrayOutputStream())
{
using (StreamWriter w = new StreamWriter(bos, Encoding.UTF8, 2048, true) { AutoFlush = true })
{
PrintTaxonomyStats.PrintStats(taxoReader, w, true);
}
result = bos.ToString();
}
Assert.IsTrue(result.IndexOf("/Author: 4 immediate children; 5 total categories", StringComparison.Ordinal) != -1);
Assert.IsTrue(result.IndexOf("/Publish Date: 3 immediate children; 12 total categories", StringComparison.Ordinal) != -1);
// Make sure at least a few nodes of the tree came out:
Assert.IsTrue(result.IndexOf(" /1999", StringComparison.Ordinal) != -1);
Assert.IsTrue(result.IndexOf(" /2012", StringComparison.Ordinal) != -1);
Assert.IsTrue(result.IndexOf(" /20", StringComparison.Ordinal) != -1);
IOUtils.Dispose(writer, taxoWriter, searcher.IndexReader, taxoReader, taxoDir, dir);
}
// LUCENE-5333
[Test]
public virtual void TestSparseFacets()
{
Store.Directory dir = NewDirectory();
Store.Directory taxoDir = NewDirectory();
// Writes facet ords to a separate directory from the
// main index:
var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);
RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, dir);
FacetsConfig config = new FacetsConfig();
Document doc = new Document();
doc.Add(new FacetField("a", "foo1"));
writer.AddDocument(config.Build(taxoWriter, doc));
if (Random.NextBoolean())
{
writer.Commit();
}
doc = new Document();
doc.Add(new FacetField("a", "foo2"));
doc.Add(new FacetField("b", "bar1"));
writer.AddDocument(config.Build(taxoWriter, doc));
if (Random.NextBoolean())
{
writer.Commit();
}
doc = new Document();
doc.Add(new FacetField("a", "foo3"));
doc.Add(new FacetField("b", "bar2"));
doc.Add(new FacetField("c", "baz1"));
writer.AddDocument(config.Build(taxoWriter, doc));
// NRT open
IndexSearcher searcher = NewSearcher(writer.GetReader());
// NRT open
var taxoReader = new DirectoryTaxonomyReader(taxoWriter);
FacetsCollector c = new FacetsCollector();
searcher.Search(new MatchAllDocsQuery(), c);
Facets facets = GetTaxonomyFacetCounts(taxoReader, new FacetsConfig(), c);
// Ask for top 10 labels for any dims that have counts:
IList<FacetResult> results = facets.GetAllDims(10);
Assert.AreEqual(3, results.Count);
Assert.AreEqual("dim=a path=[] value=3 childCount=3\n foo1 (1)\n foo2 (1)\n foo3 (1)\n", results[0].ToString());
Assert.AreEqual("dim=b path=[] value=2 childCount=2\n bar1 (1)\n bar2 (1)\n", results[1].ToString());
Assert.AreEqual("dim=c path=[] value=1 childCount=1\n baz1 (1)\n", results[2].ToString());
IOUtils.Dispose(writer, taxoWriter, searcher.IndexReader, taxoReader, taxoDir, dir);
}
[Test]
public virtual void TestWrongIndexFieldName()
{
Store.Directory dir = NewDirectory();
Store.Directory taxoDir = NewDirectory();
// Writes facet ords to a separate directory from the
// main index:
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);
FacetsConfig config = new FacetsConfig();
config.SetIndexFieldName("a", "$facets2");
RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, dir);
Document doc = new Document();
doc.Add(new FacetField("a", "foo1"));
writer.AddDocument(config.Build(taxoWriter, doc));
// NRT open
IndexSearcher searcher = NewSearcher(writer.GetReader());
// NRT open
var taxoReader = new DirectoryTaxonomyReader(taxoWriter);
FacetsCollector c = new FacetsCollector();
searcher.Search(new MatchAllDocsQuery(), c);
// Uses default $facets field:
Facets facets;
if (Random.NextBoolean())
{
facets = new FastTaxonomyFacetCounts(taxoReader, config, c);
}
else
{
OrdinalsReader ordsReader = new DocValuesOrdinalsReader();
if (Random.NextBoolean())
{
ordsReader = new CachedOrdinalsReader(ordsReader);
}
facets = new TaxonomyFacetCounts(ordsReader, taxoReader, config, c);
}
// Ask for top 10 labels for any dims that have counts:
IList<FacetResult> results = facets.GetAllDims(10);
Assert.IsTrue(results.Count == 0);
try
{
facets.GetSpecificValue("a");
fail("should have hit exc");
}
catch (ArgumentException)
{
// expected
}
try
{
facets.GetTopChildren(10, "a");
fail("should have hit exc");
}
catch (ArgumentException)
{
// expected
}
IOUtils.Dispose(writer, taxoWriter, searcher.IndexReader, taxoReader, taxoDir, dir);
}
[Test]
public virtual void TestReallyNoNormsForDrillDown()
{
Store.Directory dir = NewDirectory();
Store.Directory taxoDir = NewDirectory();
IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
iwc.SetSimilarity(new PerFieldSimilarityWrapperAnonymousInnerClassHelper(this));
ITaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);
RandomIndexWriter writer = new RandomIndexWriter(Random, dir, iwc);
FacetsConfig config = new FacetsConfig();
Document doc = new Document();
doc.Add(NewTextField("field", "text", Field.Store.NO));
doc.Add(new FacetField("a", "path"));
writer.AddDocument(config.Build(taxoWriter, doc));
IOUtils.Dispose(writer, taxoWriter, dir, taxoDir);
}
private class PerFieldSimilarityWrapperAnonymousInnerClassHelper : PerFieldSimilarityWrapper
{
private readonly TestTaxonomyFacetCounts outerInstance;
public PerFieldSimilarityWrapperAnonymousInnerClassHelper(TestTaxonomyFacetCounts outerInstance)
{
this.outerInstance = outerInstance;
sim = new DefaultSimilarity();
}
private readonly Similarity sim;
public override Similarity Get(string name)
{
Assert.AreEqual("field", name);
return sim;
}
}
[Test]
public virtual void TestMultiValuedHierarchy()
{
Store.Directory dir = NewDirectory();
Store.Directory taxoDir = NewDirectory();
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);
FacetsConfig config = new FacetsConfig();
config.SetHierarchical("a", true);
config.SetMultiValued("a", true);
RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, dir);
Document doc = new Document();
doc.Add(NewTextField("field", "text", Field.Store.NO));
doc.Add(new FacetField("a", "path", "x"));
doc.Add(new FacetField("a", "path", "y"));
writer.AddDocument(config.Build(taxoWriter, doc));
// NRT open
IndexSearcher searcher = NewSearcher(writer.GetReader());
// NRT open
var taxoReader = new DirectoryTaxonomyReader(taxoWriter);
// Aggregate the facet counts:
FacetsCollector c = new FacetsCollector();
// MatchAllDocsQuery is for "browsing" (counts facets
// for all non-deleted docs in the index); normally
// you'd use a "normal" query, and use MultiCollector to
// wrap collecting the "normal" hits and also facets:
searcher.Search(new MatchAllDocsQuery(), c);
Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c);
try
{
facets.GetSpecificValue("a");
fail("didn't hit expected exception");
}
catch (ArgumentException)
{
// expected
}
FacetResult result = facets.GetTopChildren(10, "a");
Assert.AreEqual(1, result.LabelValues.Length);
Assert.AreEqual(1, (int)result.LabelValues[0].Value);
IOUtils.Dispose(writer, taxoWriter, searcher.IndexReader, taxoReader, dir, taxoDir);
}
[Test]
public virtual void TestLabelWithDelimiter()
{
Store.Directory dir = NewDirectory();
Store.Directory taxoDir = NewDirectory();
RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, dir);
var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);
FacetsConfig config = new FacetsConfig();
config.SetMultiValued("dim", true);
Document doc = new Document();
doc.Add(NewTextField("field", "text", Field.Store.NO));
doc.Add(new FacetField("dim", "test\u001Fone"));
doc.Add(new FacetField("dim", "test\u001Etwo"));
writer.AddDocument(config.Build(taxoWriter, doc));
// NRT open
IndexSearcher searcher = NewSearcher(writer.GetReader());
// NRT open
var taxoReader = new DirectoryTaxonomyReader(taxoWriter);
FacetsCollector c = new FacetsCollector();
searcher.Search(new MatchAllDocsQuery(), c);
Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c);
Assert.AreEqual(1, facets.GetSpecificValue("dim", "test\u001Fone"));
Assert.AreEqual(1, facets.GetSpecificValue("dim", "test\u001Etwo"));
FacetResult result = facets.GetTopChildren(10, "dim");
Assert.AreEqual("dim=dim path=[] value=-1 childCount=2\n test\u001Fone (1)\n test\u001Etwo (1)\n", result.ToString());
IOUtils.Dispose(writer, taxoWriter, searcher.IndexReader, taxoReader, dir, taxoDir);
}
[Test]
public virtual void TestRequireDimCount()
{
Store.Directory dir = NewDirectory();
Store.Directory taxoDir = NewDirectory();
RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, dir);
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);
FacetsConfig config = new FacetsConfig();
config.SetRequireDimCount("dim", true);
config.SetMultiValued("dim2", true);
config.SetRequireDimCount("dim2", true);
config.SetMultiValued("dim3", true);
config.SetHierarchical("dim3", true);
config.SetRequireDimCount("dim3", true);
Document doc = new Document();
doc.Add(NewTextField("field", "text", Field.Store.NO));
doc.Add(new FacetField("dim", "a"));
doc.Add(new FacetField("dim2", "a"));
doc.Add(new FacetField("dim2", "b"));
doc.Add(new FacetField("dim3", "a", "b"));
doc.Add(new FacetField("dim3", "a", "c"));
writer.AddDocument(config.Build(taxoWriter, doc));
// NRT open
IndexSearcher searcher = NewSearcher(writer.GetReader());
// NRT open
var taxoReader = new DirectoryTaxonomyReader(taxoWriter);
FacetsCollector c = new FacetsCollector();
searcher.Search(new MatchAllDocsQuery(), c);
Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c);
Assert.AreEqual(1, facets.GetTopChildren(10, "dim").Value);
Assert.AreEqual(1, facets.GetTopChildren(10, "dim2").Value);
Assert.AreEqual(1, facets.GetTopChildren(10, "dim3").Value);
try
{
Assert.AreEqual(1, facets.GetSpecificValue("dim"));
fail("didn't hit expected exception");
}
catch (ArgumentException)
{
// expected
}
Assert.AreEqual(1, facets.GetSpecificValue("dim2"));
Assert.AreEqual(1, facets.GetSpecificValue("dim3"));
IOUtils.Dispose(writer, taxoWriter, searcher.IndexReader, taxoReader, dir, taxoDir);
}
// LUCENE-4583: make sure if we require > 32 KB for one
// document, we don't hit exc when using Facet42DocValuesFormat
[Test]
public virtual void TestManyFacetsInOneDocument()
{
AssumeTrue("default Codec doesn't support huge BinaryDocValues", TestUtil.FieldSupportsHugeBinaryDocValues(FacetsConfig.DEFAULT_INDEX_FIELD_NAME));
Store.Directory dir = NewDirectory();
Store.Directory taxoDir = NewDirectory();
IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
RandomIndexWriter writer = new RandomIndexWriter(Random, dir, iwc);
var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);
FacetsConfig config = new FacetsConfig();
config.SetMultiValued("dim", true);
int numLabels = TestUtil.NextInt32(Random, 40000, 100000);
Document doc = new Document();
doc.Add(NewTextField("field", "text", Field.Store.NO));
for (int i = 0; i < numLabels; i++)
{
doc.Add(new FacetField("dim", "" + i));
}
writer.AddDocument(config.Build(taxoWriter, doc));
// NRT open
IndexSearcher searcher = NewSearcher(writer.GetReader());
// NRT open
var taxoReader = new DirectoryTaxonomyReader(taxoWriter);
// Aggregate the facet counts:
FacetsCollector c = new FacetsCollector();
// MatchAllDocsQuery is for "browsing" (counts facets
// for all non-deleted docs in the index); normally
// you'd use a "normal" query, and use MultiCollector to
// wrap collecting the "normal" hits and also facets:
searcher.Search(new MatchAllDocsQuery(), c);
Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c);
FacetResult result = facets.GetTopChildren(int.MaxValue, "dim");
Assert.AreEqual(numLabels, result.LabelValues.Length);
var allLabels = new JCG.HashSet<string>();
foreach (LabelAndValue labelValue in result.LabelValues)
{
allLabels.Add(labelValue.Label);
Assert.AreEqual(1, (int)labelValue.Value);
}
Assert.AreEqual(numLabels, allLabels.Count);
IOUtils.Dispose(searcher.IndexReader, taxoWriter, writer, taxoReader, dir, taxoDir);
}
// Make sure we catch when app didn't declare field as
// hierarchical but it was:
[Test]
public virtual void TestDetectHierarchicalField()
{
Store.Directory dir = NewDirectory();
Store.Directory taxoDir = NewDirectory();
var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);
var writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, dir);
FacetsConfig config = new FacetsConfig();
Document doc = new Document();
doc.Add(NewTextField("field", "text", Field.Store.NO));
doc.Add(new FacetField("a", "path", "other"));
try
{
config.Build(taxoWriter, doc);
fail("did not hit expected exception");
}
catch (ArgumentException)
{
// expected
}
IOUtils.Dispose(writer, taxoWriter, dir, taxoDir);
}
// Make sure we catch when app didn't declare field as
// multi-valued but it was:
[Test]
public virtual void TestDetectMultiValuedField()
{
Store.Directory dir = NewDirectory();
Store.Directory taxoDir = NewDirectory();
ITaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);
RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, dir);
FacetsConfig config = new FacetsConfig();
Document doc = new Document();
doc.Add(NewTextField("field", "text", Field.Store.NO));
doc.Add(new FacetField("a", "path"));
doc.Add(new FacetField("a", "path2"));
try
{
config.Build(taxoWriter, doc);
fail("did not hit expected exception");
}
catch (ArgumentException)
{
// expected
}
IOUtils.Dispose(writer, taxoWriter, dir, taxoDir);
}
[Test]
public virtual void TestSeparateIndexedFields()
{
Store.Directory indexDir = NewDirectory();
Store.Directory taxoDir = NewDirectory();
var taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
IndexWriter iw = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));
FacetsConfig config = new FacetsConfig();
config.SetIndexFieldName("b", "$b");
for (int i = AtLeast(30); i > 0; --i)
{
Document doc = new Document();
doc.Add(new StringField("f", "v", Field.Store.NO));
doc.Add(new FacetField("a", "1"));
doc.Add(new FacetField("b", "1"));
iw.AddDocument(config.Build(taxoWriter, doc));
}
DirectoryReader r = DirectoryReader.Open(iw, true);
var taxoReader = new DirectoryTaxonomyReader(taxoWriter);
FacetsCollector sfc = new FacetsCollector();
NewSearcher(r).Search(new MatchAllDocsQuery(), sfc);
Facets facets1 = GetTaxonomyFacetCounts(taxoReader, config, sfc);
Facets facets2 = GetTaxonomyFacetCounts(taxoReader, config, sfc, "$b");
Assert.AreEqual(r.MaxDoc, (int)facets1.GetTopChildren(10, "a").Value);
Assert.AreEqual(r.MaxDoc, (int)facets2.GetTopChildren(10, "b").Value);
IOUtils.Dispose(taxoWriter, iw, taxoReader, taxoDir, r, indexDir);
}
[Test]
public virtual void TestCountRoot()
{
// LUCENE-4882: FacetsAccumulator threw NPE if a FacetRequest was defined on CP.EMPTY
Store.Directory indexDir = NewDirectory();
Store.Directory taxoDir = NewDirectory();
var taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
IndexWriter iw = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));
FacetsConfig config = new FacetsConfig();
for (int i = AtLeast(30); i > 0; --i)
{
Document doc = new Document();
doc.Add(new FacetField("a", "1"));
doc.Add(new FacetField("b", "1"));
iw.AddDocument(config.Build(taxoWriter, doc));
}
DirectoryReader r = DirectoryReader.Open(iw, true);
DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
FacetsCollector sfc = new FacetsCollector();
NewSearcher(r).Search(new MatchAllDocsQuery(), sfc);
Facets facets = GetTaxonomyFacetCounts(taxoReader, config, sfc);
foreach (FacetResult result in facets.GetAllDims(10))
{
Assert.AreEqual(r.NumDocs, (int)result.Value);
}
IOUtils.Dispose(taxoWriter, iw, taxoReader, taxoDir, r, indexDir);
}
[Test]
public virtual void TestGetFacetResultsTwice()
{
// LUCENE-4893: counts were multiplied as many times as getFacetResults was called.
Store.Directory indexDir = NewDirectory();
Store.Directory taxoDir = NewDirectory();
var taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
IndexWriter iw = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));
FacetsConfig config = new FacetsConfig();
Document doc = new Document();
doc.Add(new FacetField("a", "1"));
doc.Add(new FacetField("b", "1"));
iw.AddDocument(config.Build(taxoWriter, doc));
DirectoryReader r = DirectoryReader.Open(iw, true);
var taxoReader = new DirectoryTaxonomyReader(taxoWriter);
FacetsCollector sfc = new FacetsCollector();
NewSearcher(r).Search(new MatchAllDocsQuery(), sfc);
Facets facets = GetTaxonomyFacetCounts(taxoReader, config, sfc);
IList<FacetResult> res1 = facets.GetAllDims(10);
IList<FacetResult> res2 = facets.GetAllDims(10);
Assert.AreEqual(res1, res2, aggressive: false, "calling getFacetResults twice should return the .equals()=true result");
IOUtils.Dispose(taxoWriter, iw, taxoReader, taxoDir, r, indexDir);
}
[Test]
public virtual void TestChildCount()
{
// LUCENE-4885: FacetResult.numValidDescendants was not set properly by FacetsAccumulator
var indexDir = NewDirectory();
var taxoDir = NewDirectory();
var taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
IndexWriter iw = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));
FacetsConfig config = new FacetsConfig();
for (int i = 0; i < 10; i++)
{
Document doc = new Document();
doc.Add(new FacetField("a", Convert.ToString(i, CultureInfo.InvariantCulture)));
iw.AddDocument(config.Build(taxoWriter, doc));
}
DirectoryReader r = DirectoryReader.Open(iw, true);
DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
FacetsCollector sfc = new FacetsCollector();
NewSearcher(r).Search(new MatchAllDocsQuery(), sfc);
Facets facets = GetTaxonomyFacetCounts(taxoReader, config, sfc);
Assert.AreEqual(10, facets.GetTopChildren(2, "a").ChildCount);
IOUtils.Dispose(taxoWriter, iw, taxoReader, taxoDir, r, indexDir);
}
private void indexTwoDocs(ITaxonomyWriter taxoWriter, IndexWriter indexWriter, FacetsConfig config, bool withContent)
{
for (int i = 0; i < 2; i++)
{
Document doc = new Document();
if (withContent)
{
doc.Add(new StringField("f", "a", Field.Store.NO));
}
if (config != null)
{
doc.Add(new FacetField("A", Convert.ToString(i, CultureInfo.InvariantCulture)));
indexWriter.AddDocument(config.Build(taxoWriter, doc));
}
else
{
indexWriter.AddDocument(doc);
}
}
indexWriter.Commit();
}
[Test]
public virtual void TestSegmentsWithoutCategoriesOrResults()
{
// tests the accumulator when there are segments with no results
var indexDir = NewDirectory();
var taxoDir = NewDirectory();
IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
//iwc.MergePolicy = NoMergePolicy.INSTANCE; // prevent merges
IndexWriter indexWriter = new IndexWriter(indexDir, iwc);
var taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
FacetsConfig config = new FacetsConfig();
indexTwoDocs(taxoWriter, indexWriter, config, false); // 1st segment, no content, with categories
indexTwoDocs(taxoWriter, indexWriter, null, true); // 2nd segment, with content, no categories
indexTwoDocs(taxoWriter, indexWriter, config, true); // 3rd segment ok
indexTwoDocs(taxoWriter, indexWriter, null, false); // 4th segment, no content, or categories
indexTwoDocs(taxoWriter, indexWriter, null, true); // 5th segment, with content, no categories
indexTwoDocs(taxoWriter, indexWriter, config, true); // 6th segment, with content, with categories
indexTwoDocs(taxoWriter, indexWriter, null, true); // 7th segment, with content, no categories
IOUtils.Dispose(indexWriter, taxoWriter);
DirectoryReader indexReader = DirectoryReader.Open(indexDir);
var taxoReader = new DirectoryTaxonomyReader(taxoDir);
IndexSearcher indexSearcher = NewSearcher(indexReader);
// search for "f:a", only segments 1 and 3 should match results
Query q = new TermQuery(new Term("f", "a"));
FacetsCollector sfc = new FacetsCollector();
indexSearcher.Search(q, sfc);
Facets facets = GetTaxonomyFacetCounts(taxoReader, config, sfc);
FacetResult result = facets.GetTopChildren(10, "A");
Assert.AreEqual(2, result.LabelValues.Length, "wrong number of children");
foreach (LabelAndValue labelValue in result.LabelValues)
{
Assert.AreEqual(2, (int)labelValue.Value, "wrong weight for child " + labelValue.Label);
}
IOUtils.Dispose(indexReader, taxoReader, indexDir, taxoDir);
}
[Test]
public virtual void TestRandom()
{
string[] tokens = GetRandomTokens(10);
Store.Directory indexDir = NewDirectory();
Store.Directory taxoDir = NewDirectory();
RandomIndexWriter w = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, indexDir);
var tw = new DirectoryTaxonomyWriter(taxoDir);
FacetsConfig config = new FacetsConfig();
int numDocs = AtLeast(1000);
int numDims = TestUtil.NextInt32(Random, 1, 7);
IList<TestDoc> testDocs = GetRandomDocs(tokens, numDocs, numDims);
foreach (TestDoc testDoc in testDocs)
{
Document doc = new Document();
doc.Add(NewStringField("content", testDoc.content, Field.Store.NO));
for (int j = 0; j < numDims; j++)
{
if (testDoc.dims[j] != null)
{
doc.Add(new FacetField("dim" + j, testDoc.dims[j]));
}
}
w.AddDocument(config.Build(tw, doc));
}
// NRT open
IndexSearcher searcher = NewSearcher(w.GetReader());
// NRT open
var tr = new DirectoryTaxonomyReader(tw);
int iters = AtLeast(100);
for (int iter = 0; iter < iters; iter++)
{
string searchToken = tokens[Random.Next(tokens.Length)];
if (Verbose)
{
Console.WriteLine("\nTEST: iter content=" + searchToken);
}
FacetsCollector fc = new FacetsCollector();
FacetsCollector.Search(searcher, new TermQuery(new Term("content", searchToken)), 10, fc);
Facets facets = GetTaxonomyFacetCounts(tr, config, fc);
// Slow, yet hopefully bug-free, faceting:
var expectedCounts = new List<Dictionary<string, int?>>();
for (int i = 0; i < numDims; i++)
{
expectedCounts.Add(new Dictionary<string, int?>());
}
foreach (TestDoc doc in testDocs)
{
if (doc.content.Equals(searchToken, StringComparison.Ordinal))
{
for (int j = 0; j < numDims; j++)
{
if (doc.dims[j] != null)
{
if (!expectedCounts[j].TryGetValue(doc.dims[j], out int? v) || v == null)
{
expectedCounts[j][doc.dims[j]] = 1;
}
else
{
expectedCounts[j][doc.dims[j]] = (int)v + 1;
}
}
}
}
}
List<FacetResult> expected = new List<FacetResult>();
for (int i = 0; i < numDims; i++)
{
List<LabelAndValue> labelValues = new List<LabelAndValue>();
int totCount = 0;
foreach (KeyValuePair<string, int?> ent in expectedCounts[i])
{
labelValues.Add(new LabelAndValue(ent.Key, ent.Value.Value));
totCount += ent.Value.Value;
}
SortLabelValues(labelValues);
if (totCount > 0)
{
expected.Add(new FacetResult("dim" + i, new string[0], totCount, labelValues.ToArray(), labelValues.Count));
}
}
// Sort by highest value, tie break by value:
SortFacetResults(expected);
IList<FacetResult> actual = facets.GetAllDims(10);
// Messy: fixup ties
SortTies(actual);
Assert.AreEqual(expected, actual, aggressive: false);
}
IOUtils.Dispose(w, tw, searcher.IndexReader, tr, indexDir, taxoDir);
}
}
}