blob: bf011b4283513c52ab5876ebb81272261a2d553b [file] [log] [blame]
package org.apache.lucene.facet.search;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.facet.FacetTestCase;
import org.apache.lucene.facet.index.FacetFields;
import org.apache.lucene.facet.params.CategoryListParams;
import org.apache.lucene.facet.params.FacetIndexingParams;
import org.apache.lucene.facet.params.FacetSearchParams;
import org.apache.lucene.facet.params.PerDimensionIndexingParams;
import org.apache.lucene.facet.range.LongRange;
import org.apache.lucene.facet.range.RangeAccumulator;
import org.apache.lucene.facet.range.RangeFacetRequest;
import org.apache.lucene.facet.sampling.RandomSampler;
import org.apache.lucene.facet.sampling.Sampler;
import org.apache.lucene.facet.sampling.SamplingAccumulator;
import org.apache.lucene.facet.sampling.SamplingParams;
import org.apache.lucene.facet.sampling.SamplingWrapper;
import org.apache.lucene.facet.sampling.TakmiSampleFixer;
import org.apache.lucene.facet.search.FacetRequest.ResultMode;
import org.apache.lucene.facet.sortedset.SortedSetDocValuesAccumulator;
import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiCollector;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IOUtils;
import org.junit.Test;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
public class TestFacetsCollector extends FacetTestCase {
@Test
public void testSumScoreAggregator() throws Exception {
Directory indexDir = newDirectory();
Directory taxoDir = newDirectory();
TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(taxoDir);
IndexWriter iw = new IndexWriter(indexDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
FacetFields facetFields = new FacetFields(taxonomyWriter);
for(int i = atLeast(30); i > 0; --i) {
Document doc = new Document();
if (random().nextBoolean()) { // don't match all documents
doc.add(new StringField("f", "v", Store.NO));
}
facetFields.addFields(doc, Collections.singletonList(new CategoryPath("a")));
iw.addDocument(doc);
}
taxonomyWriter.close();
iw.close();
DirectoryReader r = DirectoryReader.open(indexDir);
DirectoryTaxonomyReader taxo = new DirectoryTaxonomyReader(taxoDir);
FacetSearchParams sParams = new FacetSearchParams(new SumScoreFacetRequest(new CategoryPath("a"), 10));
FacetsAccumulator fa = new FacetsAccumulator(sParams, r, taxo) {
@Override
public FacetsAggregator getAggregator() {
return new SumScoreFacetsAggregator();
}
};
FacetsCollector fc = FacetsCollector.create(fa);
TopScoreDocCollector topDocs = TopScoreDocCollector.create(10, false);
ConstantScoreQuery csq = new ConstantScoreQuery(new MatchAllDocsQuery());
csq.setBoost(2.0f);
newSearcher(r).search(csq, MultiCollector.wrap(fc, topDocs));
List<FacetResult> res = fc.getFacetResults();
float value = (float) res.get(0).getFacetResultNode().value;
TopDocs td = topDocs.topDocs();
int expected = (int) (td.getMaxScore() * td.totalHits);
assertEquals(expected, (int) value);
IOUtils.close(taxo, taxoDir, r, indexDir);
}
@Test
public void testMultiCountingLists() throws Exception {
Directory indexDir = newDirectory();
Directory taxoDir = newDirectory();
TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(taxoDir);
IndexWriter iw = new IndexWriter(indexDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
FacetIndexingParams fip = new PerDimensionIndexingParams(Collections.singletonMap(new CategoryPath("b"), new CategoryListParams("$b")));
FacetFields facetFields = new FacetFields(taxonomyWriter, fip);
for(int i = atLeast(30); i > 0; --i) {
Document doc = new Document();
doc.add(new StringField("f", "v", Store.NO));
List<CategoryPath> cats = new ArrayList<CategoryPath>();
cats.add(new CategoryPath("a"));
cats.add(new CategoryPath("b"));
facetFields.addFields(doc, cats);
iw.addDocument(doc);
}
taxonomyWriter.close();
iw.close();
DirectoryReader r = DirectoryReader.open(indexDir);
DirectoryTaxonomyReader taxo = new DirectoryTaxonomyReader(taxoDir);
FacetSearchParams sParams = new FacetSearchParams(fip,
new CountFacetRequest(new CategoryPath("a"), 10),
new CountFacetRequest(new CategoryPath("b"), 10));
FacetsCollector fc = FacetsCollector.create(sParams, r, taxo);
newSearcher(r).search(new MatchAllDocsQuery(), fc);
for (FacetResult res : fc.getFacetResults()) {
assertEquals("unexpected count for " + res, r.maxDoc(), (int) res.getFacetResultNode().value);
}
IOUtils.close(taxo, taxoDir, r, indexDir);
}
@Test
public void testCountAndSumScore() throws Exception {
Directory indexDir = newDirectory();
Directory taxoDir = newDirectory();
TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(taxoDir);
IndexWriter iw = new IndexWriter(indexDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
FacetIndexingParams fip = new PerDimensionIndexingParams(Collections.singletonMap(new CategoryPath("b"), new CategoryListParams("$b")));
FacetFields facetFields = new FacetFields(taxonomyWriter, fip);
for(int i = atLeast(30); i > 0; --i) {
Document doc = new Document();
doc.add(new StringField("f", "v", Store.NO));
List<CategoryPath> cats = new ArrayList<CategoryPath>();
cats.add(new CategoryPath("a"));
cats.add(new CategoryPath("b"));
facetFields.addFields(doc, cats);
iw.addDocument(doc);
}
taxonomyWriter.close();
iw.close();
DirectoryReader r = DirectoryReader.open(indexDir);
DirectoryTaxonomyReader taxo = new DirectoryTaxonomyReader(taxoDir);
FacetSearchParams sParams = new FacetSearchParams(fip,
new CountFacetRequest(new CategoryPath("a"), 10),
new SumScoreFacetRequest(new CategoryPath("b"), 10));
Map<CategoryListParams,FacetsAggregator> aggregators = new HashMap<CategoryListParams,FacetsAggregator>();
aggregators.put(fip.getCategoryListParams(new CategoryPath("a")), new FastCountingFacetsAggregator());
aggregators.put(fip.getCategoryListParams(new CategoryPath("b")), new SumScoreFacetsAggregator());
final FacetsAggregator aggregator = new PerCategoryListAggregator(aggregators, fip);
FacetsAccumulator fa = new FacetsAccumulator(sParams, r, taxo) {
@Override
public FacetsAggregator getAggregator() {
return aggregator;
}
};
FacetsCollector fc = FacetsCollector.create(fa);
TopScoreDocCollector topDocs = TopScoreDocCollector.create(10, false);
newSearcher(r).search(new MatchAllDocsQuery(), MultiCollector.wrap(fc, topDocs));
List<FacetResult> facetResults = fc.getFacetResults();
FacetResult fresA = facetResults.get(0);
assertEquals("unexpected count for " + fresA, r.maxDoc(), (int) fresA.getFacetResultNode().value);
FacetResult fresB = facetResults.get(1);
double expected = topDocs.topDocs().getMaxScore() * r.numDocs();
assertEquals("unexpected value for " + fresB, expected, fresB.getFacetResultNode().value, 1E-10);
IOUtils.close(taxo, taxoDir, r, indexDir);
}
@Test
public void testCountRoot() throws Exception {
// LUCENE-4882: FacetsAccumulator threw NPE if a FacetRequest was defined on CP.EMPTY
Directory indexDir = newDirectory();
Directory taxoDir = newDirectory();
TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(taxoDir);
IndexWriter iw = new IndexWriter(indexDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
FacetFields facetFields = new FacetFields(taxonomyWriter);
for(int i = atLeast(30); i > 0; --i) {
Document doc = new Document();
facetFields.addFields(doc, Arrays.asList(new CategoryPath("a"), new CategoryPath("b")));
iw.addDocument(doc);
}
taxonomyWriter.close();
iw.close();
DirectoryReader r = DirectoryReader.open(indexDir);
DirectoryTaxonomyReader taxo = new DirectoryTaxonomyReader(taxoDir);
FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(CategoryPath.EMPTY, 10));
final FacetsAccumulator fa = random().nextBoolean() ? new FacetsAccumulator(fsp, r, taxo) : new StandardFacetsAccumulator(fsp, r, taxo);
FacetsCollector fc = FacetsCollector.create(fa);
newSearcher(r).search(new MatchAllDocsQuery(), fc);
FacetResult res = fc.getFacetResults().get(0);
for (FacetResultNode node : res.getFacetResultNode().subResults) {
assertEquals(r.numDocs(), (int) node.value);
}
IOUtils.close(taxo, taxoDir, r, indexDir);
}
@Test
public void testGetFacetResultsTwice() throws Exception {
// LUCENE-4893: counts were multiplied as many times as getFacetResults was called.
Directory indexDir = newDirectory();
Directory taxoDir = newDirectory();
TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(taxoDir);
IndexWriter iw = new IndexWriter(indexDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
FacetFields facetFields = new FacetFields(taxonomyWriter);
Document doc = new Document();
facetFields.addFields(doc, Arrays.asList(new CategoryPath("a/1", '/'), new CategoryPath("b/1", '/')));
iw.addDocument(doc);
taxonomyWriter.close();
iw.close();
DirectoryReader r = DirectoryReader.open(indexDir);
DirectoryTaxonomyReader taxo = new DirectoryTaxonomyReader(taxoDir);
FacetSearchParams fsp = new FacetSearchParams(
new CountFacetRequest(new CategoryPath("a"), 10),
new CountFacetRequest(new CategoryPath("b"), 10));
final FacetsAccumulator fa = random().nextBoolean() ? new FacetsAccumulator(fsp, r, taxo) : new StandardFacetsAccumulator(fsp, r, taxo);
final FacetsCollector fc = FacetsCollector.create(fa);
newSearcher(r).search(new MatchAllDocsQuery(), fc);
List<FacetResult> res1 = fc.getFacetResults();
List<FacetResult> res2 = fc.getFacetResults();
assertSame("calling getFacetResults twice should return the exact same result", res1, res2);
IOUtils.close(taxo, taxoDir, r, indexDir);
}
@Test
public void testReset() throws Exception {
Directory indexDir = newDirectory();
Directory taxoDir = newDirectory();
TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(taxoDir);
IndexWriter iw = new IndexWriter(indexDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
FacetFields facetFields = new FacetFields(taxonomyWriter);
Document doc = new Document();
facetFields.addFields(doc, Arrays.asList(new CategoryPath("a/1", '/'), new CategoryPath("b/1", '/')));
iw.addDocument(doc);
taxonomyWriter.close();
iw.close();
DirectoryReader r = DirectoryReader.open(indexDir);
DirectoryTaxonomyReader taxo = new DirectoryTaxonomyReader(taxoDir);
FacetSearchParams fsp = new FacetSearchParams(
new CountFacetRequest(new CategoryPath("a"), 10),
new CountFacetRequest(new CategoryPath("b"), 10));
final FacetsAccumulator fa = random().nextBoolean() ? new FacetsAccumulator(fsp, r, taxo) : new StandardFacetsAccumulator(fsp, r, taxo);
final FacetsCollector fc = FacetsCollector.create(fa);
// this should populate the cached results, but doing search should clear the cache
fc.getFacetResults();
newSearcher(r).search(new MatchAllDocsQuery(), fc);
List<FacetResult> res1 = fc.getFacetResults();
// verify that we didn't get the cached result
assertEquals(2, res1.size());
for (FacetResult res : res1) {
assertEquals(1, res.getFacetResultNode().subResults.size());
assertEquals(1, (int) res.getFacetResultNode().subResults.get(0).value);
}
fc.reset();
List<FacetResult> res2 = fc.getFacetResults();
assertNotSame("reset() should clear the cached results", res1, res2);
IOUtils.close(taxo, taxoDir, r, indexDir);
}
@Test
public void testParentOrdinal() throws Exception {
// LUCENE-4913: root ordinal was always 0 when all children were requested
Directory indexDir = newDirectory();
Directory taxoDir = newDirectory();
TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(taxoDir);
IndexWriter iw = new IndexWriter(indexDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
FacetFields facetFields = new FacetFields(taxonomyWriter);
Document doc = new Document();
facetFields.addFields(doc, Arrays.asList(new CategoryPath("a/1", '/')));
iw.addDocument(doc);
taxonomyWriter.close();
iw.close();
DirectoryReader r = DirectoryReader.open(indexDir);
DirectoryTaxonomyReader taxo = new DirectoryTaxonomyReader(taxoDir);
// assert IntFacetResultHandler
FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(new CategoryPath("a"), 10));
FacetsAccumulator fa = random().nextBoolean() ? new FacetsAccumulator(fsp, r, taxo) : new StandardFacetsAccumulator(fsp, r, taxo);
FacetsCollector fc = FacetsCollector.create(fa);
newSearcher(r).search(new MatchAllDocsQuery(), fc);
assertTrue("invalid ordinal for child node: 0", 0 != fc.getFacetResults().get(0).getFacetResultNode().subResults.get(0).ordinal);
// assert IntFacetResultHandler
fsp = new FacetSearchParams(new SumScoreFacetRequest(new CategoryPath("a"), 10));
if (random().nextBoolean()) {
fa = new FacetsAccumulator(fsp, r, taxo) {
@Override
public FacetsAggregator getAggregator() {
return new SumScoreFacetsAggregator();
}
};
} else {
fa = new StandardFacetsAccumulator(fsp, r, taxo);
}
fc = FacetsCollector.create(fa);
newSearcher(r).search(new MatchAllDocsQuery(), fc);
assertTrue("invalid ordinal for child node: 0", 0 != fc.getFacetResults().get(0).getFacetResultNode().subResults.get(0).ordinal);
IOUtils.close(taxo, taxoDir, r, indexDir);
}
@Test
public void testNumValidDescendants() throws Exception {
// LUCENE-4885: FacetResult.numValidDescendants was not set properly by FacetsAccumulator
Directory indexDir = newDirectory();
Directory taxoDir = newDirectory();
TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(taxoDir);
IndexWriter iw = new IndexWriter(indexDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
FacetFields facetFields = new FacetFields(taxonomyWriter);
for (int i = 0; i < 10; i++) {
Document doc = new Document();
facetFields.addFields(doc, Arrays.asList(new CategoryPath("a", Integer.toString(i))));
iw.addDocument(doc);
}
taxonomyWriter.close();
iw.close();
DirectoryReader r = DirectoryReader.open(indexDir);
DirectoryTaxonomyReader taxo = new DirectoryTaxonomyReader(taxoDir);
CountFacetRequest cfr = new CountFacetRequest(new CategoryPath("a"), 2);
cfr.setResultMode(random().nextBoolean() ? ResultMode.GLOBAL_FLAT : ResultMode.PER_NODE_IN_TREE);
FacetSearchParams fsp = new FacetSearchParams(cfr);
final FacetsAccumulator fa = random().nextBoolean() ? new FacetsAccumulator(fsp, r, taxo) : new StandardFacetsAccumulator(fsp, r, taxo);
FacetsCollector fc = FacetsCollector.create(fa);
newSearcher(r).search(new MatchAllDocsQuery(), fc);
FacetResult res = fc.getFacetResults().get(0);
assertEquals(10, res.getNumValidDescendants());
IOUtils.close(taxo, taxoDir, r, indexDir);
}
@Test
public void testLabeling() throws Exception {
Directory indexDir = newDirectory(), taxoDir = newDirectory();
// create the index
IndexWriter indexWriter = new IndexWriter(indexDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
FacetFields facetFields = new FacetFields(taxoWriter);
Document doc = new Document();
facetFields.addFields(doc, Arrays.asList(new CategoryPath("A/1", '/')));
indexWriter.addDocument(doc);
IOUtils.close(indexWriter, taxoWriter);
DirectoryReader indexReader = DirectoryReader.open(indexDir);
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
IndexSearcher searcher = new IndexSearcher(indexReader);
// ask to count a non-existing category to test labeling
FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(new CategoryPath("B"), 5));
final SamplingParams sampleParams = new SamplingParams();
sampleParams.setMaxSampleSize(100);
sampleParams.setMinSampleSize(100);
sampleParams.setSamplingThreshold(100);
sampleParams.setOversampleFactor(1.0d);
if (random().nextBoolean()) {
sampleParams.setSampleFixer(new TakmiSampleFixer(indexReader, taxoReader, fsp));
}
final Sampler sampler = new RandomSampler(sampleParams, random());
FacetsAccumulator[] accumulators = new FacetsAccumulator[] {
new FacetsAccumulator(fsp, indexReader, taxoReader),
new StandardFacetsAccumulator(fsp, indexReader, taxoReader),
new SamplingAccumulator(sampler, fsp, indexReader, taxoReader),
new AdaptiveFacetsAccumulator(fsp, indexReader, taxoReader),
new SamplingWrapper(new StandardFacetsAccumulator(fsp, indexReader, taxoReader), sampler)
};
for (FacetsAccumulator fa : accumulators) {
FacetsCollector fc = FacetsCollector.create(fa);
searcher.search(new MatchAllDocsQuery(), fc);
List<FacetResult> facetResults = fc.getFacetResults();
assertNotNull(facetResults);
assertEquals("incorrect label returned for " + fa, fsp.facetRequests.get(0).categoryPath, facetResults.get(0).getFacetResultNode().label);
}
try {
// SortedSetDocValuesAccumulator cannot even be created in such state
assertNull(new SortedSetDocValuesAccumulator(fsp, new SortedSetDocValuesReaderState(indexReader)));
// if this ever changes, make sure FacetResultNode is labeled correctly
fail("should not have succeeded to execute a request over a category which wasn't indexed as SortedSetDVField");
} catch (IllegalArgumentException e) {
// expected
}
fsp = new FacetSearchParams(new RangeFacetRequest<LongRange>("f", new LongRange("grr", 0, true, 1, true)));
RangeAccumulator ra = new RangeAccumulator(fsp, indexReader);
FacetsCollector fc = FacetsCollector.create(ra);
searcher.search(new MatchAllDocsQuery(), fc);
List<FacetResult> facetResults = fc.getFacetResults();
assertNotNull(facetResults);
assertEquals("incorrect label returned for RangeAccumulator", fsp.facetRequests.get(0).categoryPath, facetResults.get(0).getFacetResultNode().label);
IOUtils.close(indexReader, taxoReader);
IOUtils.close(indexDir, taxoDir);
}
}