blob: d88fecf1df7305a4d94c263eecb759eb5f18f4b5 [file] [log] [blame]
package org.apache.solr.request;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayDeque;
import java.util.Collections;
import java.util.Deque;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.document.FieldType.NumericType;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.StringHelper;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.TrieField;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.SolrIndexSearcher;
/** Utility class to compute facets on numeric fields. */
final class NumericFacets {
NumericFacets() {}
static class HashTable {
static final float LOAD_FACTOR = 0.7f;
long[] bits; // bits identifying a value
int[] counts;
int[] docIDs;
int mask;
int size;
int threshold;
HashTable() {
final int capacity = 64; // must be a power of 2
bits = new long[capacity];
counts = new int[capacity];
docIDs = new int[capacity];
mask = capacity - 1;
size = 0;
threshold = (int) (capacity * LOAD_FACTOR);
}
private int hash(long v) {
int h = (int) (v ^ (v >>> 32));
h = (31 * h) & mask; // * 31 to try to use the whole table, even if values are dense
return h;
}
void add(int docID, long value, int count) {
if (size >= threshold) {
rehash();
}
final int h = hash(value);
for (int slot = h; ; slot = (slot + 1) & mask) {
if (counts[slot] == 0) {
bits[slot] = value;
docIDs[slot] = docID;
++size;
} else if (bits[slot] != value) {
continue;
}
counts[slot] += count;
break;
}
}
private void rehash() {
final long[] oldBits = bits;
final int[] oldCounts = counts;
final int[] oldDocIDs = docIDs;
final int newCapacity = bits.length * 2;
bits = new long[newCapacity];
counts = new int[newCapacity];
docIDs = new int[newCapacity];
mask = newCapacity - 1;
threshold = (int) (LOAD_FACTOR * newCapacity);
size = 0;
for (int i = 0; i < oldBits.length; ++i) {
if (oldCounts[i] > 0) {
add(oldDocIDs[i], oldBits[i], oldCounts[i]);
}
}
}
}
private static class Entry {
int docID;
int count;
long bits;
}
public static NamedList<Integer> getCounts(SolrIndexSearcher searcher, DocSet docs, String fieldName, int offset, int limit, int mincount, boolean missing, String sort) throws IOException {
final boolean zeros = mincount <= 0;
mincount = Math.max(mincount, 1);
final SchemaField sf = searcher.getSchema().getField(fieldName);
final FieldType ft = sf.getType();
final NumericType numericType = ft.getNumericType();
if (numericType == null) {
throw new IllegalStateException();
}
final List<AtomicReaderContext> leaves = searcher.getIndexReader().leaves();
// 1. accumulate
final HashTable hashTable = new HashTable();
final Iterator<AtomicReaderContext> ctxIt = leaves.iterator();
AtomicReaderContext ctx = null;
FieldCache.Longs longs = null;
Bits docsWithField = null;
int missingCount = 0;
for (DocIterator docsIt = docs.iterator(); docsIt.hasNext(); ) {
final int doc = docsIt.nextDoc();
if (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc()) {
do {
ctx = ctxIt.next();
} while (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc());
assert doc >= ctx.docBase;
switch (numericType) {
case LONG:
longs = FieldCache.DEFAULT.getLongs(ctx.reader(), fieldName, true);
break;
case INT:
final FieldCache.Ints ints = FieldCache.DEFAULT.getInts(ctx.reader(), fieldName, true);
longs = new FieldCache.Longs() {
@Override
public long get(int docID) {
return ints.get(docID);
}
};
break;
case FLOAT:
final FieldCache.Floats floats = FieldCache.DEFAULT.getFloats(ctx.reader(), fieldName, true);
longs = new FieldCache.Longs() {
@Override
public long get(int docID) {
return NumericUtils.floatToSortableInt(floats.get(docID));
}
};
break;
case DOUBLE:
final FieldCache.Doubles doubles = FieldCache.DEFAULT.getDoubles(ctx.reader(), fieldName, true);
longs = new FieldCache.Longs() {
@Override
public long get(int docID) {
return NumericUtils.doubleToSortableLong(doubles.get(docID));
}
};
break;
default:
throw new AssertionError();
}
docsWithField = FieldCache.DEFAULT.getDocsWithField(ctx.reader(), fieldName);
}
long v = longs.get(doc - ctx.docBase);
if (v != 0 || docsWithField.get(doc - ctx.docBase)) {
hashTable.add(doc, v, 1);
} else {
++missingCount;
}
}
// 2. select top-k facet values
final int pqSize = limit < 0 ? hashTable.size : Math.min(offset + limit, hashTable.size);
final PriorityQueue<Entry> pq;
if (FacetParams.FACET_SORT_COUNT.equals(sort) || FacetParams.FACET_SORT_COUNT_LEGACY.equals(sort)) {
pq = new PriorityQueue<Entry>(pqSize) {
@Override
protected boolean lessThan(Entry a, Entry b) {
if (a.count < b.count || (a.count == b.count && a.bits > b.bits)) {
return true;
} else {
return false;
}
}
};
} else {
pq = new PriorityQueue<Entry>(pqSize) {
@Override
protected boolean lessThan(Entry a, Entry b) {
return a.bits > b.bits;
}
};
}
Entry e = null;
for (int i = 0; i < hashTable.bits.length; ++i) {
if (hashTable.counts[i] >= mincount) {
if (e == null) {
e = new Entry();
}
e.bits = hashTable.bits[i];
e.count = hashTable.counts[i];
e.docID = hashTable.docIDs[i];
e = pq.insertWithOverflow(e);
}
}
// 4. build the NamedList
final ValueSource vs = ft.getValueSource(sf, null);
final NamedList<Integer> result = new NamedList<>();
// This stuff is complicated because if facet.mincount=0, the counts needs
// to be merged with terms from the terms dict
if (!zeros || FacetParams.FACET_SORT_COUNT.equals(sort) || FacetParams.FACET_SORT_COUNT_LEGACY.equals(sort)) {
// Only keep items we're interested in
final Deque<Entry> counts = new ArrayDeque<>();
while (pq.size() > offset) {
counts.addFirst(pq.pop());
}
// Entries from the PQ first, then using the terms dictionary
for (Entry entry : counts) {
final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves);
final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx));
result.add(values.strVal(entry.docID - leaves.get(readerIdx).docBase), entry.count);
}
if (zeros && (limit < 0 || result.size() < limit)) { // need to merge with the term dict
if (!sf.indexed()) {
throw new IllegalStateException("Cannot use " + FacetParams.FACET_MINCOUNT + "=0 on field " + sf.getName() + " which is not indexed");
}
// Add zeros until there are limit results
final Set<String> alreadySeen = new HashSet<>();
while (pq.size() > 0) {
Entry entry = pq.pop();
final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves);
final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx));
alreadySeen.add(values.strVal(entry.docID - leaves.get(readerIdx).docBase));
}
for (int i = 0; i < result.size(); ++i) {
alreadySeen.add(result.getName(i));
}
final Terms terms = searcher.getAtomicReader().terms(fieldName);
if (terms != null) {
final String prefixStr = TrieField.getMainValuePrefix(ft);
final BytesRef prefix;
if (prefixStr != null) {
prefix = new BytesRef(prefixStr);
} else {
prefix = new BytesRef();
}
final TermsEnum termsEnum = terms.iterator(null);
BytesRef term;
switch (termsEnum.seekCeil(prefix)) {
case FOUND:
case NOT_FOUND:
term = termsEnum.term();
break;
case END:
term = null;
break;
default:
throw new AssertionError();
}
final CharsRef spare = new CharsRef();
for (int skipped = hashTable.size; skipped < offset && term != null && StringHelper.startsWith(term, prefix); ) {
ft.indexedToReadable(term, spare);
final String termStr = spare.toString();
if (!alreadySeen.contains(termStr)) {
++skipped;
}
term = termsEnum.next();
}
for ( ; term != null && StringHelper.startsWith(term, prefix) && (limit < 0 || result.size() < limit); term = termsEnum.next()) {
ft.indexedToReadable(term, spare);
final String termStr = spare.toString();
if (!alreadySeen.contains(termStr)) {
result.add(termStr, 0);
}
}
}
}
} else {
// sort=index, mincount=0 and we have less than limit items
// => Merge the PQ and the terms dictionary on the fly
if (!sf.indexed()) {
throw new IllegalStateException("Cannot use " + FacetParams.FACET_SORT + "=" + FacetParams.FACET_SORT_INDEX + " on a field which is not indexed");
}
final Map<String, Integer> counts = new HashMap<>();
while (pq.size() > 0) {
final Entry entry = pq.pop();
final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves);
final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx));
counts.put(values.strVal(entry.docID - leaves.get(readerIdx).docBase), entry.count);
}
final Terms terms = searcher.getAtomicReader().terms(fieldName);
if (terms != null) {
final String prefixStr = TrieField.getMainValuePrefix(ft);
final BytesRef prefix;
if (prefixStr != null) {
prefix = new BytesRef(prefixStr);
} else {
prefix = new BytesRef();
}
final TermsEnum termsEnum = terms.iterator(null);
BytesRef term;
switch (termsEnum.seekCeil(prefix)) {
case FOUND:
case NOT_FOUND:
term = termsEnum.term();
break;
case END:
term = null;
break;
default:
throw new AssertionError();
}
final CharsRef spare = new CharsRef();
for (int i = 0; i < offset && term != null && StringHelper.startsWith(term, prefix); ++i) {
term = termsEnum.next();
}
for ( ; term != null && StringHelper.startsWith(term, prefix) && (limit < 0 || result.size() < limit); term = termsEnum.next()) {
ft.indexedToReadable(term, spare);
final String termStr = spare.toString();
Integer count = counts.get(termStr);
if (count == null) {
count = 0;
}
result.add(termStr, count);
}
}
}
if (missing) {
result.add(null, missingCount);
}
return result;
}
}