blob: 2fa8b60d4712198551589d0a0396cc9c4628aadd [file] [log] [blame]
package org.apache.lucene.search.grouping.term;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.grouping.AbstractDistinctValuesCollector;
import org.apache.lucene.search.grouping.SearchGroup;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.SentinelIntSet;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
/**
* A term based implementation of {@link org.apache.lucene.search.grouping.AbstractDistinctValuesCollector} that relies
* on {@link SortedDocValues} to count the distinct values per group.
*
* @lucene.experimental
*/
public class TermDistinctValuesCollector extends AbstractDistinctValuesCollector<TermDistinctValuesCollector.GroupCount> {
private final String groupField;
private final String countField;
private final List<GroupCount> groups;
private final SentinelIntSet ordSet;
private final GroupCount groupCounts[];
private SortedDocValues groupFieldTermIndex;
private SortedDocValues countFieldTermIndex;
/**
* Constructs {@link TermDistinctValuesCollector} instance.
*
* @param groupField The field to group by
* @param countField The field to count distinct values for
* @param groups The top N groups, collected during the first phase search
*/
public TermDistinctValuesCollector(String groupField, String countField, Collection<SearchGroup<BytesRef>> groups) {
this.groupField = groupField;
this.countField = countField;
this.groups = new ArrayList<>(groups.size());
for (SearchGroup<BytesRef> group : groups) {
this.groups.add(new GroupCount(group.groupValue));
}
ordSet = new SentinelIntSet(groups.size(), -2);
groupCounts = new GroupCount[ordSet.keys.length];
}
@Override
public void collect(int doc) throws IOException {
int slot = ordSet.find(groupFieldTermIndex.getOrd(doc));
if (slot < 0) {
return;
}
GroupCount gc = groupCounts[slot];
int countOrd = countFieldTermIndex.getOrd(doc);
if (doesNotContainOrd(countOrd, gc.ords)) {
if (countOrd == -1) {
gc.uniqueValues.add(null);
} else {
BytesRef br = new BytesRef();
countFieldTermIndex.lookupOrd(countOrd, br);
gc.uniqueValues.add(br);
}
gc.ords = Arrays.copyOf(gc.ords, gc.ords.length + 1);
gc.ords[gc.ords.length - 1] = countOrd;
if (gc.ords.length > 1) {
Arrays.sort(gc.ords);
}
}
}
private boolean doesNotContainOrd(int ord, int[] ords) {
if (ords.length == 0) {
return true;
} else if (ords.length == 1) {
return ord != ords[0];
}
return Arrays.binarySearch(ords, ord) < 0;
}
@Override
public List<GroupCount> getGroups() {
return groups;
}
@Override
protected void doSetNextReader(AtomicReaderContext context) throws IOException {
groupFieldTermIndex = DocValues.getSorted(context.reader(), groupField);
countFieldTermIndex = DocValues.getSorted(context.reader(), countField);
ordSet.clear();
for (GroupCount group : groups) {
int groupOrd = group.groupValue == null ? -1 : groupFieldTermIndex.lookupTerm(group.groupValue);
if (group.groupValue != null && groupOrd < 0) {
continue;
}
groupCounts[ordSet.put(groupOrd)] = group;
group.ords = new int[group.uniqueValues.size()];
Arrays.fill(group.ords, -2);
int i = 0;
for (BytesRef value : group.uniqueValues) {
int countOrd = value == null ? -1 : countFieldTermIndex.lookupTerm(value);
if (value == null || countOrd >= 0) {
group.ords[i++] = countOrd;
}
}
}
}
/** Holds distinct values for a single group.
*
* @lucene.experimental */
public static class GroupCount extends AbstractDistinctValuesCollector.GroupCount<BytesRef> {
int[] ords;
GroupCount(BytesRef groupValue) {
super(groupValue);
}
}
}