blob: 9a84b8def805eda3427c3827e7d8ec1242327d6d [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.facet.taxonomy;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.facet.FacetField;
import org.apache.lucene.facet.FacetResult;
import org.apache.lucene.facet.FacetTestCase;
import org.apache.lucene.facet.Facets;
import org.apache.lucene.facet.FacetsCollector;
import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.LabelAndValue;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IOUtils;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
public class TestTaxonomyFacetCounts2 extends FacetTestCase {
private static final Term A = new Term("f", "a");
private static final String CP_A = "A", CP_B = "B";
private static final String CP_C = "C", CP_D = "D"; // indexed w/ NO_PARENTS
private static final int NUM_CHILDREN_CP_A = 5, NUM_CHILDREN_CP_B = 3;
private static final int NUM_CHILDREN_CP_C = 5, NUM_CHILDREN_CP_D = 5;
private static final FacetField[] CATEGORIES_A, CATEGORIES_B;
private static final FacetField[] CATEGORIES_C, CATEGORIES_D;
static {
CATEGORIES_A = new FacetField[NUM_CHILDREN_CP_A];
for (int i = 0; i < NUM_CHILDREN_CP_A; i++) {
CATEGORIES_A[i] = new FacetField(CP_A, Integer.toString(i));
}
CATEGORIES_B = new FacetField[NUM_CHILDREN_CP_B];
for (int i = 0; i < NUM_CHILDREN_CP_B; i++) {
CATEGORIES_B[i] = new FacetField(CP_B, Integer.toString(i));
}
// NO_PARENTS categories
CATEGORIES_C = new FacetField[NUM_CHILDREN_CP_C];
for (int i = 0; i < NUM_CHILDREN_CP_C; i++) {
CATEGORIES_C[i] = new FacetField(CP_C, Integer.toString(i));
}
// Multi-level categories
CATEGORIES_D = new FacetField[NUM_CHILDREN_CP_D];
for (int i = 0; i < NUM_CHILDREN_CP_D; i++) {
String val = Integer.toString(i);
CATEGORIES_D[i] = new FacetField(CP_D, val, val + val); // e.g. D/1/11, D/2/22...
}
}
private static Directory indexDir, taxoDir;
private static Map<String,Integer> allExpectedCounts, termExpectedCounts;
@AfterClass
public static void afterClassCountingFacetsAggregatorTest() throws Exception {
IOUtils.close(indexDir, taxoDir);
indexDir = taxoDir = null;
}
private static List<FacetField> randomCategories(Random random) {
// add random categories from the two dimensions, ensuring that the same
// category is not added twice.
int numFacetsA = random.nextInt(3) + 1; // 1-3
int numFacetsB = random.nextInt(2) + 1; // 1-2
ArrayList<FacetField> categories_a = new ArrayList<>();
categories_a.addAll(Arrays.asList(CATEGORIES_A));
ArrayList<FacetField> categories_b = new ArrayList<>();
categories_b.addAll(Arrays.asList(CATEGORIES_B));
Collections.shuffle(categories_a, random);
Collections.shuffle(categories_b, random);
ArrayList<FacetField> categories = new ArrayList<>();
categories.addAll(categories_a.subList(0, numFacetsA));
categories.addAll(categories_b.subList(0, numFacetsB));
// add the NO_PARENT categories
categories.add(CATEGORIES_C[random().nextInt(NUM_CHILDREN_CP_C)]);
categories.add(CATEGORIES_D[random().nextInt(NUM_CHILDREN_CP_D)]);
return categories;
}
private static void addField(Document doc) {
doc.add(new StringField(A.field(), A.text(), Store.NO));
}
private static void addFacets(Document doc, FacetsConfig config, boolean updateTermExpectedCounts)
throws IOException {
List<FacetField> docCategories = randomCategories(random());
for (FacetField ff : docCategories) {
doc.add(ff);
String cp = ff.dim + "/" + ff.path[0];
allExpectedCounts.put(cp, allExpectedCounts.get(cp) + 1);
if (updateTermExpectedCounts) {
termExpectedCounts.put(cp, termExpectedCounts.get(cp) + 1);
}
}
// add 1 to each NO_PARENTS dimension
allExpectedCounts.put(CP_B, allExpectedCounts.get(CP_B) + 1);
allExpectedCounts.put(CP_C, allExpectedCounts.get(CP_C) + 1);
allExpectedCounts.put(CP_D, allExpectedCounts.get(CP_D) + 1);
if (updateTermExpectedCounts) {
termExpectedCounts.put(CP_B, termExpectedCounts.get(CP_B) + 1);
termExpectedCounts.put(CP_C, termExpectedCounts.get(CP_C) + 1);
termExpectedCounts.put(CP_D, termExpectedCounts.get(CP_D) + 1);
}
}
private static FacetsConfig getConfig() {
FacetsConfig config = new FacetsConfig();
config.setMultiValued("A", true);
config.setMultiValued("B", true);
config.setRequireDimCount("B", true);
config.setHierarchical("D", true);
return config;
}
private static void indexDocsNoFacets(IndexWriter indexWriter) throws IOException {
int numDocs = atLeast(2);
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
addField(doc);
indexWriter.addDocument(doc);
}
indexWriter.commit(); // flush a segment
}
private static void indexDocsWithFacetsNoTerms(IndexWriter indexWriter, TaxonomyWriter taxoWriter,
Map<String,Integer> expectedCounts) throws IOException {
Random random = random();
int numDocs = atLeast(random, 2);
FacetsConfig config = getConfig();
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
addFacets(doc, config, false);
indexWriter.addDocument(config.build(taxoWriter, doc));
}
indexWriter.commit(); // flush a segment
}
private static void indexDocsWithFacetsAndTerms(IndexWriter indexWriter, TaxonomyWriter taxoWriter,
Map<String,Integer> expectedCounts) throws IOException {
Random random = random();
int numDocs = atLeast(random, 2);
FacetsConfig config = getConfig();
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
addFacets(doc, config, true);
addField(doc);
indexWriter.addDocument(config.build(taxoWriter, doc));
}
indexWriter.commit(); // flush a segment
}
private static void indexDocsWithFacetsAndSomeTerms(IndexWriter indexWriter, TaxonomyWriter taxoWriter,
Map<String,Integer> expectedCounts) throws IOException {
Random random = random();
int numDocs = atLeast(random, 2);
FacetsConfig config = getConfig();
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
boolean hasContent = random.nextBoolean();
if (hasContent) {
addField(doc);
}
addFacets(doc, config, hasContent);
indexWriter.addDocument(config.build(taxoWriter, doc));
}
indexWriter.commit(); // flush a segment
}
// initialize expectedCounts w/ 0 for all categories
private static Map<String,Integer> newCounts() {
Map<String,Integer> counts = new HashMap<>();
counts.put(CP_A, 0);
counts.put(CP_B, 0);
counts.put(CP_C, 0);
counts.put(CP_D, 0);
for (FacetField ff : CATEGORIES_A) {
counts.put(ff.dim + "/" + ff.path[0], 0);
}
for (FacetField ff : CATEGORIES_B) {
counts.put(ff.dim + "/" + ff.path[0], 0);
}
for (FacetField ff : CATEGORIES_C) {
counts.put(ff.dim + "/" + ff.path[0], 0);
}
for (FacetField ff : CATEGORIES_D) {
counts.put(ff.dim + "/" + ff.path[0], 0);
}
return counts;
}
@BeforeClass
public static void beforeClassCountingFacetsAggregatorTest() throws Exception {
indexDir = newDirectory();
taxoDir = newDirectory();
// create an index which has:
// 1. Segment with no categories, but matching results
// 2. Segment w/ categories, but no results
// 3. Segment w/ categories and results
// 4. Segment w/ categories, but only some results
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
conf.setMergePolicy(NoMergePolicy.INSTANCE); // prevent merges, so we can control the index segments
IndexWriter indexWriter = new IndexWriter(indexDir, conf);
TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
allExpectedCounts = newCounts();
termExpectedCounts = newCounts();
// segment w/ no categories
indexDocsNoFacets(indexWriter);
// segment w/ categories, no content
indexDocsWithFacetsNoTerms(indexWriter, taxoWriter, allExpectedCounts);
// segment w/ categories and content
indexDocsWithFacetsAndTerms(indexWriter, taxoWriter, allExpectedCounts);
// segment w/ categories and some content
indexDocsWithFacetsAndSomeTerms(indexWriter, taxoWriter, allExpectedCounts);
indexWriter.close();
IOUtils.close(taxoWriter);
}
@Test
public void testDifferentNumResults() throws Exception {
// test the collector w/ FacetRequests and different numResults
DirectoryReader indexReader = DirectoryReader.open(indexDir);
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
IndexSearcher searcher = newSearcher(indexReader);
FacetsCollector sfc = new FacetsCollector();
TermQuery q = new TermQuery(A);
searcher.search(q, sfc);
Facets facets = getTaxonomyFacetCounts(taxoReader, getConfig(), sfc);
FacetResult result = facets.getTopChildren(NUM_CHILDREN_CP_A, CP_A);
assertEquals(-1, result.value.intValue());
for(LabelAndValue labelValue : result.labelValues) {
assertEquals(termExpectedCounts.get(CP_A + "/" + labelValue.label), labelValue.value);
}
result = facets.getTopChildren(NUM_CHILDREN_CP_B, CP_B);
assertEquals(termExpectedCounts.get(CP_B), result.value);
for(LabelAndValue labelValue : result.labelValues) {
assertEquals(termExpectedCounts.get(CP_B + "/" + labelValue.label), labelValue.value);
}
IOUtils.close(indexReader, taxoReader);
}
@Test
public void testAllCounts() throws Exception {
DirectoryReader indexReader = DirectoryReader.open(indexDir);
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
IndexSearcher searcher = newSearcher(indexReader);
FacetsCollector sfc = new FacetsCollector();
searcher.search(new MatchAllDocsQuery(), sfc);
Facets facets = getTaxonomyFacetCounts(taxoReader, getConfig(), sfc);
FacetResult result = facets.getTopChildren(NUM_CHILDREN_CP_A, CP_A);
assertEquals(-1, result.value.intValue());
int prevValue = Integer.MAX_VALUE;
for(LabelAndValue labelValue : result.labelValues) {
assertEquals(allExpectedCounts.get(CP_A + "/" + labelValue.label), labelValue.value);
assertTrue("wrong sort order of sub results: labelValue.value=" + labelValue.value + " prevValue=" + prevValue, labelValue.value.intValue() <= prevValue);
prevValue = labelValue.value.intValue();
}
result = facets.getTopChildren(NUM_CHILDREN_CP_B, CP_B);
assertEquals(allExpectedCounts.get(CP_B), result.value);
prevValue = Integer.MAX_VALUE;
for(LabelAndValue labelValue : result.labelValues) {
assertEquals(allExpectedCounts.get(CP_B + "/" + labelValue.label), labelValue.value);
assertTrue("wrong sort order of sub results: labelValue.value=" + labelValue.value + " prevValue=" + prevValue, labelValue.value.intValue() <= prevValue);
prevValue = labelValue.value.intValue();
}
IOUtils.close(indexReader, taxoReader);
}
@Test
public void testBigNumResults() throws Exception {
DirectoryReader indexReader = DirectoryReader.open(indexDir);
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
IndexSearcher searcher = newSearcher(indexReader);
FacetsCollector sfc = new FacetsCollector();
searcher.search(new MatchAllDocsQuery(), sfc);
Facets facets = getTaxonomyFacetCounts(taxoReader, getConfig(), sfc);
FacetResult result = facets.getTopChildren(Integer.MAX_VALUE, CP_A);
assertEquals(-1, result.value.intValue());
for(LabelAndValue labelValue : result.labelValues) {
assertEquals(allExpectedCounts.get(CP_A + "/" + labelValue.label), labelValue.value);
}
result = facets.getTopChildren(Integer.MAX_VALUE, CP_B);
assertEquals(allExpectedCounts.get(CP_B), result.value);
for(LabelAndValue labelValue : result.labelValues) {
assertEquals(allExpectedCounts.get(CP_B + "/" + labelValue.label), labelValue.value);
}
IOUtils.close(indexReader, taxoReader);
}
@Test
public void testNoParents() throws Exception {
DirectoryReader indexReader = DirectoryReader.open(indexDir);
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
IndexSearcher searcher = newSearcher(indexReader);
FacetsCollector sfc = new FacetsCollector();
searcher.search(new MatchAllDocsQuery(), sfc);
Facets facets = getTaxonomyFacetCounts(taxoReader, getConfig(), sfc);
FacetResult result = facets.getTopChildren(NUM_CHILDREN_CP_C, CP_C);
assertEquals(allExpectedCounts.get(CP_C), result.value);
for(LabelAndValue labelValue : result.labelValues) {
assertEquals(allExpectedCounts.get(CP_C + "/" + labelValue.label), labelValue.value);
}
result = facets.getTopChildren(NUM_CHILDREN_CP_D, CP_D);
assertEquals(allExpectedCounts.get(CP_C), result.value);
for(LabelAndValue labelValue : result.labelValues) {
assertEquals(allExpectedCounts.get(CP_D + "/" + labelValue.label), labelValue.value);
}
IOUtils.close(indexReader, taxoReader);
}
}