blob: 61a572299fe7d76101458d1afc53df56bb8bae34 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.grouping;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.NavigableSet;
import java.util.Random;
import java.util.Set;
import java.util.TreeSet;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.TestUtil;
public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
public void testSimple() throws Exception {
final String groupField = "hotel";
FieldType customType = new FieldType();
customType.setStored(true);
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(
random(),
dir,
newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
boolean useDv = true;
// 0
Document doc = new Document();
addField(doc, groupField, "a", useDv);
addField(doc, "airport", "ams", useDv);
addField(doc, "duration", "5", useDv);
w.addDocument(doc);
// 1
doc = new Document();
addField(doc, groupField, "a", useDv);
addField(doc, "airport", "dus", useDv);
addField(doc, "duration", "10", useDv);
w.addDocument(doc);
// 2
doc = new Document();
addField(doc, groupField, "b", useDv);
addField(doc, "airport", "ams", useDv);
addField(doc, "duration", "10", useDv);
w.addDocument(doc);
w.commit(); // To ensure a second segment
// 3
doc = new Document();
addField(doc, groupField, "b", useDv);
addField(doc, "airport", "ams", useDv);
addField(doc, "duration", "5", useDv);
w.addDocument(doc);
// 4
doc = new Document();
addField(doc, groupField, "b", useDv);
addField(doc, "airport", "ams", useDv);
addField(doc, "duration", "5", useDv);
w.addDocument(doc);
IndexSearcher indexSearcher = newSearcher(w.getReader());
List<TermGroupFacetCollector.FacetEntry> entries;
GroupFacetCollector groupedAirportFacetCollector;
TermGroupFacetCollector.GroupedFacetResult airportResult;
for (int limit : new int[] { 2, 10, 100, Integer.MAX_VALUE }) {
// any of these limits is plenty for the data we have
groupedAirportFacetCollector = createRandomCollector
(useDv ? "hotel_dv" : "hotel",
useDv ? "airport_dv" : "airport", null, false);
indexSearcher.search(new MatchAllDocsQuery(), groupedAirportFacetCollector);
int maxOffset = 5;
airportResult = groupedAirportFacetCollector.mergeSegmentResults
(Integer.MAX_VALUE == limit ? limit : maxOffset + limit, 0, false);
assertEquals(3, airportResult.getTotalCount());
assertEquals(0, airportResult.getTotalMissingCount());
entries = airportResult.getFacetEntries(maxOffset, limit);
assertEquals(0, entries.size());
entries = airportResult.getFacetEntries(0, limit);
assertEquals(2, entries.size());
assertEquals("ams", entries.get(0).getValue().utf8ToString());
assertEquals(2, entries.get(0).getCount());
assertEquals("dus", entries.get(1).getValue().utf8ToString());
assertEquals(1, entries.get(1).getCount());
entries = airportResult.getFacetEntries(1, limit);
assertEquals(1, entries.size());
assertEquals("dus", entries.get(0).getValue().utf8ToString());
assertEquals(1, entries.get(0).getCount());
}
GroupFacetCollector groupedDurationFacetCollector = createRandomCollector(useDv ? "hotel_dv" : "hotel", useDv ? "duration_dv" : "duration", null, false);
indexSearcher.search(new MatchAllDocsQuery(), groupedDurationFacetCollector);
TermGroupFacetCollector.GroupedFacetResult durationResult = groupedDurationFacetCollector.mergeSegmentResults(10, 0, false);
assertEquals(4, durationResult.getTotalCount());
assertEquals(0, durationResult.getTotalMissingCount());
entries = durationResult.getFacetEntries(0, 10);
assertEquals(2, entries.size());
assertEquals("10", entries.get(0).getValue().utf8ToString());
assertEquals(2, entries.get(0).getCount());
assertEquals("5", entries.get(1).getValue().utf8ToString());
assertEquals(2, entries.get(1).getCount());
// 5
doc = new Document();
addField(doc, groupField, "b", useDv);
// missing airport
if (useDv) {
addField(doc, "airport", "", useDv);
}
addField(doc, "duration", "5", useDv);
w.addDocument(doc);
// 6
doc = new Document();
addField(doc, groupField, "b", useDv);
addField(doc, "airport", "bru", useDv);
addField(doc, "duration", "10", useDv);
w.addDocument(doc);
// 7
doc = new Document();
addField(doc, groupField, "b", useDv);
addField(doc, "airport", "bru", useDv);
addField(doc, "duration", "15", useDv);
w.addDocument(doc);
// 8
doc = new Document();
addField(doc, groupField, "a", useDv);
addField(doc, "airport", "bru", useDv);
addField(doc, "duration", "10", useDv);
w.addDocument(doc);
indexSearcher.getIndexReader().close();
indexSearcher = newSearcher(w.getReader());
groupedAirportFacetCollector = createRandomCollector(useDv ? "hotel_dv" : "hotel", useDv ? "airport_dv" : "airport", null, !useDv);
indexSearcher.search(new MatchAllDocsQuery(), groupedAirportFacetCollector);
airportResult = groupedAirportFacetCollector.mergeSegmentResults(3, 0, true);
entries = airportResult.getFacetEntries(1, 2);
assertEquals(2, entries.size());
if (useDv) {
assertEquals(6, airportResult.getTotalCount());
assertEquals(0, airportResult.getTotalMissingCount());
assertEquals("bru", entries.get(0).getValue().utf8ToString());
assertEquals(2, entries.get(0).getCount());
assertEquals("", entries.get(1).getValue().utf8ToString());
assertEquals(1, entries.get(1).getCount());
} else {
assertEquals(5, airportResult.getTotalCount());
assertEquals(1, airportResult.getTotalMissingCount());
assertEquals("bru", entries.get(0).getValue().utf8ToString());
assertEquals(2, entries.get(0).getCount());
assertEquals("dus", entries.get(1).getValue().utf8ToString());
assertEquals(1, entries.get(1).getCount());
}
groupedDurationFacetCollector = createRandomCollector(useDv ? "hotel_dv" : "hotel", useDv ? "duration_dv" : "duration", null, false);
indexSearcher.search(new MatchAllDocsQuery(), groupedDurationFacetCollector);
durationResult = groupedDurationFacetCollector.mergeSegmentResults(10, 2, true);
assertEquals(5, durationResult.getTotalCount());
assertEquals(0, durationResult.getTotalMissingCount());
entries = durationResult.getFacetEntries(1, 1);
assertEquals(1, entries.size());
assertEquals("5", entries.get(0).getValue().utf8ToString());
assertEquals(2, entries.get(0).getCount());
// 9
doc = new Document();
addField(doc, groupField, "c", useDv);
addField(doc, "airport", "bru", useDv);
addField(doc, "duration", "15", useDv);
w.addDocument(doc);
// 10
doc = new Document();
addField(doc, groupField, "c", useDv);
addField(doc, "airport", "dus", useDv);
addField(doc, "duration", "10", useDv);
w.addDocument(doc);
indexSearcher.getIndexReader().close();
indexSearcher = newSearcher(w.getReader());
groupedAirportFacetCollector = createRandomCollector(useDv ? "hotel_dv" : "hotel", useDv ? "airport_dv" : "airport", null, false);
indexSearcher.search(new MatchAllDocsQuery(), groupedAirportFacetCollector);
airportResult = groupedAirportFacetCollector.mergeSegmentResults(10, 0, false);
entries = airportResult.getFacetEntries(0, 10);
if (useDv) {
assertEquals(8, airportResult.getTotalCount());
assertEquals(0, airportResult.getTotalMissingCount());
assertEquals(4, entries.size());
assertEquals("", entries.get(0).getValue().utf8ToString());
assertEquals(1, entries.get(0).getCount());
assertEquals("ams", entries.get(1).getValue().utf8ToString());
assertEquals(2, entries.get(1).getCount());
assertEquals("bru", entries.get(2).getValue().utf8ToString());
assertEquals(3, entries.get(2).getCount());
assertEquals("dus", entries.get(3).getValue().utf8ToString());
assertEquals(2, entries.get(3).getCount());
} else {
assertEquals(7, airportResult.getTotalCount());
assertEquals(1, airportResult.getTotalMissingCount());
assertEquals(3, entries.size());
assertEquals("ams", entries.get(0).getValue().utf8ToString());
assertEquals(2, entries.get(0).getCount());
assertEquals("bru", entries.get(1).getValue().utf8ToString());
assertEquals(3, entries.get(1).getCount());
assertEquals("dus", entries.get(2).getValue().utf8ToString());
assertEquals(2, entries.get(2).getCount());
}
groupedDurationFacetCollector = createRandomCollector(useDv ? "hotel_dv" : "hotel", useDv ? "duration_dv" : "duration", "1", false);
indexSearcher.search(new MatchAllDocsQuery(), groupedDurationFacetCollector);
durationResult = groupedDurationFacetCollector.mergeSegmentResults(10, 0, true);
assertEquals(5, durationResult.getTotalCount());
assertEquals(0, durationResult.getTotalMissingCount());
entries = durationResult.getFacetEntries(0, 10);
assertEquals(2, entries.size());
assertEquals("10", entries.get(0).getValue().utf8ToString());
assertEquals(3, entries.get(0).getCount());
assertEquals("15", entries.get(1).getValue().utf8ToString());
assertEquals(2, entries.get(1).getCount());
w.close();
indexSearcher.getIndexReader().close();
dir.close();
}
public void testMVGroupedFacetingWithDeletes() throws Exception {
final String groupField = "hotel";
FieldType customType = new FieldType();
customType.setStored(true);
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(
random(),
dir,
newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE));
boolean useDv = true;
// Cannot assert this since we use NoMergePolicy:
w.setDoRandomForceMergeAssert(false);
// 0
Document doc = new Document();
doc.add(new StringField("x", "x", Field.Store.NO));
w.addDocument(doc);
// 1
doc = new Document();
addField(doc, groupField, "a", useDv);
doc.add(new SortedSetDocValuesField("airport", new BytesRef("ams")));
w.addDocument(doc);
w.commit();
w.deleteDocuments(new TermQuery(new Term("airport", "ams")));
// 2
doc = new Document();
addField(doc, groupField, "a", useDv);
doc.add(new SortedSetDocValuesField("airport", new BytesRef("ams")));
w.addDocument(doc);
// 3
doc = new Document();
addField(doc, groupField, "a", useDv);
doc.add(new SortedSetDocValuesField("airport", new BytesRef("dus")));
w.addDocument(doc);
// 4
doc = new Document();
addField(doc, groupField, "b", useDv);
doc.add(new SortedSetDocValuesField("airport", new BytesRef("ams")));
w.addDocument(doc);
// 5
doc = new Document();
addField(doc, groupField, "b", useDv);
doc.add(new SortedSetDocValuesField("airport", new BytesRef("ams")));
w.addDocument(doc);
// 6
doc = new Document();
addField(doc, groupField, "b", useDv);
doc.add(new SortedSetDocValuesField("airport", new BytesRef("ams")));
w.addDocument(doc);
w.commit();
// 7
doc = new Document();
doc.add(new StringField("x", "x", Field.Store.NO));
w.addDocument(doc);
w.commit();
w.close();
IndexSearcher indexSearcher = newSearcher(DirectoryReader.open(dir));
GroupFacetCollector groupedAirportFacetCollector = createRandomCollector(groupField + "_dv", "airport", null, true);
indexSearcher.search(new MatchAllDocsQuery(), groupedAirportFacetCollector);
TermGroupFacetCollector.GroupedFacetResult airportResult = groupedAirportFacetCollector.mergeSegmentResults(10, 0, false);
assertEquals(3, airportResult.getTotalCount());
assertEquals(1, airportResult.getTotalMissingCount());
List<TermGroupFacetCollector.FacetEntry> entries = airportResult.getFacetEntries(0, 10);
assertEquals(2, entries.size());
assertEquals("ams", entries.get(0).getValue().utf8ToString());
assertEquals(2, entries.get(0).getCount());
assertEquals("dus", entries.get(1).getValue().utf8ToString());
assertEquals(1, entries.get(1).getCount());
indexSearcher.getIndexReader().close();
dir.close();
}
private void addField(Document doc, String field, String value, boolean canUseIDV) {
assert canUseIDV;
doc.add(new SortedDocValuesField(field + "_dv", new BytesRef(value)));
}
public void testRandom() throws Exception {
Random random = random();
int numberOfRuns = atLeast(1);
for (int indexIter = 0; indexIter < numberOfRuns; indexIter++) {
boolean multipleFacetsPerDocument = random.nextBoolean();
IndexContext context = createIndexContext(multipleFacetsPerDocument);
final IndexSearcher searcher = newSearcher(context.indexReader);
if (VERBOSE) {
System.out.println("TEST: searcher=" + searcher);
}
for (int searchIter = 0; searchIter < 100; searchIter++) {
if (VERBOSE) {
System.out.println("TEST: searchIter=" + searchIter);
}
String searchTerm = context.contentStrings[random.nextInt(context.contentStrings.length)];
int limit = random.nextInt(context.facetValues.size());
int offset = random.nextInt(context.facetValues.size() - limit);
int size = offset + limit;
int minCount = random.nextBoolean() ? 0 : random.nextInt(1 + context.facetWithMostGroups / 10);
boolean orderByCount = random.nextBoolean();
String randomStr = getFromSet(context.facetValues, random.nextInt(context.facetValues.size()));
final String facetPrefix;
if (randomStr == null) {
facetPrefix = null;
} else {
int codePointLen = randomStr.codePointCount(0, randomStr.length());
int randomLen = random.nextInt(codePointLen);
if (codePointLen == randomLen - 1) {
facetPrefix = null;
} else {
int end = randomStr.offsetByCodePoints(0, randomLen);
facetPrefix = random.nextBoolean() ? null : randomStr.substring(end);
}
}
GroupedFacetResult expectedFacetResult = createExpectedFacetResult(searchTerm, context, offset, limit, minCount, orderByCount, facetPrefix);
GroupFacetCollector groupFacetCollector = createRandomCollector("group", "facet", facetPrefix, multipleFacetsPerDocument);
searcher.search(new TermQuery(new Term("content", searchTerm)), groupFacetCollector);
TermGroupFacetCollector.GroupedFacetResult actualFacetResult = groupFacetCollector.mergeSegmentResults(size, minCount, orderByCount);
List<TermGroupFacetCollector.FacetEntry> expectedFacetEntries = expectedFacetResult.getFacetEntries();
List<TermGroupFacetCollector.FacetEntry> actualFacetEntries = actualFacetResult.getFacetEntries(offset, limit);
if (VERBOSE) {
System.out.println("Collector: " + groupFacetCollector.getClass().getSimpleName());
System.out.println("Num group: " + context.numGroups);
System.out.println("Num doc: " + context.numDocs);
System.out.println("Index iter: " + indexIter);
System.out.println("multipleFacetsPerDocument: " + multipleFacetsPerDocument);
System.out.println("Search iter: " + searchIter);
System.out.println("Search term: " + searchTerm);
System.out.println("Min count: " + minCount);
System.out.println("Facet offset: " + offset);
System.out.println("Facet limit: " + limit);
System.out.println("Facet prefix: " + facetPrefix);
System.out.println("Order by count: " + orderByCount);
System.out.println("\n=== Expected: \n");
System.out.println("Total count " + expectedFacetResult.getTotalCount());
System.out.println("Total missing count " + expectedFacetResult.getTotalMissingCount());
int counter = 0;
for (TermGroupFacetCollector.FacetEntry expectedFacetEntry : expectedFacetEntries) {
System.out.println(
String.format(Locale.ROOT,
"%d. Expected facet value %s with count %d",
counter++, expectedFacetEntry.getValue().utf8ToString(), expectedFacetEntry.getCount()
)
);
}
System.out.println("\n=== Actual: \n");
System.out.println("Total count " + actualFacetResult.getTotalCount());
System.out.println("Total missing count " + actualFacetResult.getTotalMissingCount());
counter = 0;
for (TermGroupFacetCollector.FacetEntry actualFacetEntry : actualFacetEntries) {
System.out.println(
String.format(Locale.ROOT,
"%d. Actual facet value %s with count %d",
counter++, actualFacetEntry.getValue().utf8ToString(), actualFacetEntry.getCount()
)
);
}
System.out.println("\n===================================================================================");
}
assertEquals(expectedFacetResult.getTotalCount(), actualFacetResult.getTotalCount());
assertEquals(expectedFacetResult.getTotalMissingCount(), actualFacetResult.getTotalMissingCount());
assertEquals(expectedFacetEntries.size(), actualFacetEntries.size());
for (int i = 0; i < expectedFacetEntries.size(); i++) {
TermGroupFacetCollector.FacetEntry expectedFacetEntry = expectedFacetEntries.get(i);
TermGroupFacetCollector.FacetEntry actualFacetEntry = actualFacetEntries.get(i);
assertEquals("i=" + i + ": " + expectedFacetEntry.getValue().utf8ToString() + " != " + actualFacetEntry.getValue().utf8ToString(), expectedFacetEntry.getValue(), actualFacetEntry.getValue());
assertEquals("i=" + i + ": " + expectedFacetEntry.getCount() + " != " + actualFacetEntry.getCount(), expectedFacetEntry.getCount(), actualFacetEntry.getCount());
}
}
context.indexReader.close();
context.dir.close();
}
}
private IndexContext createIndexContext(boolean multipleFacetValuesPerDocument) throws IOException {
final Random random = random();
final int numDocs = TestUtil.nextInt(random, 138, 1145) * RANDOM_MULTIPLIER;
final int numGroups = TestUtil.nextInt(random, 1, numDocs / 4);
final int numFacets = TestUtil.nextInt(random, 1, numDocs / 6);
if (VERBOSE) {
System.out.println("TEST: numDocs=" + numDocs + " numGroups=" + numGroups);
}
final List<String> groups = new ArrayList<>();
for (int i = 0; i < numGroups; i++) {
groups.add(generateRandomNonEmptyString());
}
final List<String> facetValues = new ArrayList<>();
for (int i = 0; i < numFacets; i++) {
facetValues.add(generateRandomNonEmptyString());
}
final String[] contentBrs = new String[TestUtil.nextInt(random, 2, 20)];
if (VERBOSE) {
System.out.println("TEST: create fake content");
}
for (int contentIDX = 0; contentIDX < contentBrs.length; contentIDX++) {
contentBrs[contentIDX] = generateRandomNonEmptyString();
if (VERBOSE) {
System.out.println(" content=" + contentBrs[contentIDX]);
}
}
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(
random,
dir,
newIndexWriterConfig(new MockAnalyzer(random))
);
Document doc = new Document();
Document docNoGroup = new Document();
Document docNoFacet = new Document();
Document docNoGroupNoFacet = new Document();
Field group = newStringField("group", "", Field.Store.NO);
Field groupDc = new SortedDocValuesField("group", new BytesRef());
doc.add(groupDc);
docNoFacet.add(groupDc);
doc.add(group);
docNoFacet.add(group);
Field[] facetFields;
if (multipleFacetValuesPerDocument == false) {
facetFields = new Field[2];
facetFields[0] = newStringField("facet", "", Field.Store.NO);
doc.add(facetFields[0]);
docNoGroup.add(facetFields[0]);
facetFields[1] = new SortedDocValuesField("facet", new BytesRef());
doc.add(facetFields[1]);
docNoGroup.add(facetFields[1]);
} else {
facetFields = multipleFacetValuesPerDocument ? new Field[2 + random.nextInt(6)] : new Field[1];
for (int i = 0; i < facetFields.length; i++) {
facetFields[i] = new SortedSetDocValuesField("facet", new BytesRef());
doc.add(facetFields[i]);
docNoGroup.add(facetFields[i]);
}
}
Field content = newStringField("content", "", Field.Store.NO);
doc.add(content);
docNoGroup.add(content);
docNoFacet.add(content);
docNoGroupNoFacet.add(content);
NavigableSet<String> uniqueFacetValues = new TreeSet<>(new Comparator<String>() {
@Override
public int compare(String a, String b) {
if (a == b) {
return 0;
} else if (a == null) {
return -1;
} else if (b == null) {
return 1;
} else {
return a.compareTo(b);
}
}
});
Map<String, Map<String, Set<String>>> searchTermToFacetToGroups = new HashMap<>();
int facetWithMostGroups = 0;
for (int i = 0; i < numDocs; i++) {
final String groupValue;
if (random.nextInt(24) == 17) {
// So we test the "doc doesn't have the group'd
// field" case:
groupValue = "";
} else {
groupValue = groups.get(random.nextInt(groups.size()));
}
String contentStr = contentBrs[random.nextInt(contentBrs.length)];
if (!searchTermToFacetToGroups.containsKey(contentStr)) {
searchTermToFacetToGroups.put(contentStr, new HashMap<String, Set<String>>());
}
Map<String, Set<String>> facetToGroups = searchTermToFacetToGroups.get(contentStr);
List<String> facetVals = new ArrayList<>();
if (multipleFacetValuesPerDocument == false) {
String facetValue = facetValues.get(random.nextInt(facetValues.size()));
uniqueFacetValues.add(facetValue);
if (!facetToGroups.containsKey(facetValue)) {
facetToGroups.put(facetValue, new HashSet<String>());
}
Set<String> groupsInFacet = facetToGroups.get(facetValue);
groupsInFacet.add(groupValue);
if (groupsInFacet.size() > facetWithMostGroups) {
facetWithMostGroups = groupsInFacet.size();
}
facetFields[0].setStringValue(facetValue);
facetFields[1].setBytesValue(new BytesRef(facetValue));
facetVals.add(facetValue);
} else {
for (Field facetField : facetFields) {
String facetValue = facetValues.get(random.nextInt(facetValues.size()));
uniqueFacetValues.add(facetValue);
if (!facetToGroups.containsKey(facetValue)) {
facetToGroups.put(facetValue, new HashSet<String>());
}
Set<String> groupsInFacet = facetToGroups.get(facetValue);
groupsInFacet.add(groupValue);
if (groupsInFacet.size() > facetWithMostGroups) {
facetWithMostGroups = groupsInFacet.size();
}
facetField.setBytesValue(new BytesRef(facetValue));
facetVals.add(facetValue);
}
}
if (VERBOSE) {
System.out.println(" doc content=" + contentStr + " group=" + (groupValue == null ? "null" : groupValue) + " facetVals=" + facetVals);
}
if (groupValue != null) {
groupDc.setBytesValue(new BytesRef(groupValue));
group.setStringValue(groupValue);
} else {
// TODO: not true
// DV cannot have missing values:
groupDc.setBytesValue(new BytesRef());
}
content.setStringValue(contentStr);
if (groupValue == null && facetVals.isEmpty()) {
writer.addDocument(docNoGroupNoFacet);
} else if (facetVals.isEmpty()) {
writer.addDocument(docNoFacet);
} else if (groupValue == null) {
writer.addDocument(docNoGroup);
} else {
writer.addDocument(doc);
}
}
DirectoryReader reader = writer.getReader();
writer.close();
return new IndexContext(searchTermToFacetToGroups, reader, numDocs, dir, facetWithMostGroups, numGroups, contentBrs, uniqueFacetValues);
}
private GroupedFacetResult createExpectedFacetResult(String searchTerm, IndexContext context, int offset, int limit, int minCount, final boolean orderByCount, String facetPrefix) {
Map<String, Set<String>> facetGroups = context.searchTermToFacetGroups.get(searchTerm);
if (facetGroups == null) {
facetGroups = new HashMap<>();
}
int totalCount = 0;
int totalMissCount = 0;
Set<String> facetValues;
if (facetPrefix != null) {
facetValues = new HashSet<>();
for (String facetValue : context.facetValues) {
if (facetValue != null && facetValue.startsWith(facetPrefix)) {
facetValues.add(facetValue);
}
}
} else {
facetValues = context.facetValues;
}
List<TermGroupFacetCollector.FacetEntry> entries = new ArrayList<>(facetGroups.size());
// also includes facets with count 0
for (String facetValue : facetValues) {
if (facetValue == null) {
continue;
}
Set<String> groups = facetGroups.get(facetValue);
int count = groups != null ? groups.size() : 0;
if (count >= minCount) {
entries.add(new TermGroupFacetCollector.FacetEntry(new BytesRef(facetValue), count));
}
totalCount += count;
}
// Only include null count when no facet prefix is specified
if (facetPrefix == null) {
Set<String> groups = facetGroups.get(null);
if (groups != null) {
totalMissCount = groups.size();
}
}
Collections.sort(entries, new Comparator<TermGroupFacetCollector.FacetEntry>() {
@Override
public int compare(TermGroupFacetCollector.FacetEntry a, TermGroupFacetCollector.FacetEntry b) {
if (orderByCount) {
int cmp = b.getCount() - a.getCount();
if (cmp != 0) {
return cmp;
}
}
return a.getValue().compareTo(b.getValue());
}
});
int endOffset = offset + limit;
List<TermGroupFacetCollector.FacetEntry> entriesResult;
if (offset >= entries.size()) {
entriesResult = Collections.emptyList();
} else if (endOffset >= entries.size()) {
entriesResult = entries.subList(offset, entries.size());
} else {
entriesResult = entries.subList(offset, endOffset);
}
return new GroupedFacetResult(totalCount, totalMissCount, entriesResult);
}
private GroupFacetCollector createRandomCollector(String groupField, String facetField, String facetPrefix, boolean multipleFacetsPerDocument) {
BytesRef facetPrefixBR = facetPrefix == null ? null : new BytesRef(facetPrefix);
return TermGroupFacetCollector.createTermGroupFacetCollector(groupField, facetField, multipleFacetsPerDocument, facetPrefixBR, random().nextInt(1024));
}
private String getFromSet(Set<String> set, int index) {
int currentIndex = 0;
for (String bytesRef : set) {
if (currentIndex++ == index) {
return bytesRef;
}
}
return null;
}
private static class IndexContext {
final int numDocs;
final DirectoryReader indexReader;
final Map<String, Map<String, Set<String>>> searchTermToFacetGroups;
final NavigableSet<String> facetValues;
final Directory dir;
final int facetWithMostGroups;
final int numGroups;
final String[] contentStrings;
public IndexContext(Map<String, Map<String, Set<String>>> searchTermToFacetGroups, DirectoryReader r,
int numDocs, Directory dir, int facetWithMostGroups, int numGroups, String[] contentStrings, NavigableSet<String> facetValues) {
this.searchTermToFacetGroups = searchTermToFacetGroups;
this.indexReader = r;
this.numDocs = numDocs;
this.dir = dir;
this.facetWithMostGroups = facetWithMostGroups;
this.numGroups = numGroups;
this.contentStrings = contentStrings;
this.facetValues = facetValues;
}
}
private static class GroupedFacetResult {
final int totalCount;
final int totalMissingCount;
final List<TermGroupFacetCollector.FacetEntry> facetEntries;
private GroupedFacetResult(int totalCount, int totalMissingCount, List<TermGroupFacetCollector.FacetEntry> facetEntries) {
this.totalCount = totalCount;
this.totalMissingCount = totalMissingCount;
this.facetEntries = facetEntries;
}
public int getTotalCount() {
return totalCount;
}
public int getTotalMissingCount() {
return totalMissingCount;
}
public List<TermGroupFacetCollector.FacetEntry> getFacetEntries() {
return facetEntries;
}
}
}