blob: 76d48bc9227517349a0463b79694f5df83f41da4 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import java.io.Closeable;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
import com.codahale.metrics.Gauge;
import com.google.common.collect.Iterables;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.ExitableDirectoryReader;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiPostingsEnum;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.*;
import org.apache.lucene.search.TotalHits.Relation;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.ObjectReleaseTracker;
import org.apache.solr.core.DirectoryFactory;
import org.apache.solr.core.DirectoryFactory.DirContext;
import org.apache.solr.core.SolrConfig;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrInfoBean;
import org.apache.solr.index.SlowCompositeReaderWrapper;
import org.apache.solr.metrics.MetricsMap;
import org.apache.solr.metrics.SolrMetricManager;
import org.apache.solr.metrics.SolrMetricsContext;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrRequestInfo;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.facet.UnInvertedField;
import org.apache.solr.search.join.GraphQuery;
import org.apache.solr.search.stats.StatsCache;
import org.apache.solr.search.stats.StatsSource;
import org.apache.solr.uninverting.UninvertingReader;
import org.apache.solr.update.IndexFingerprint;
import org.apache.solr.update.SolrIndexConfig;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* SolrIndexSearcher adds schema awareness and caching functionality over {@link IndexSearcher}.
*
* @since solr 0.9
*/
public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrInfoBean {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
public static final String STATS_SOURCE = "org.apache.solr.stats_source";
public static final String STATISTICS_KEY = "searcher";
// These should *only* be used for debugging or monitoring purposes
public static final AtomicLong numOpens = new AtomicLong();
public static final AtomicLong numCloses = new AtomicLong();
@SuppressWarnings({"rawtypes"})
private static final Map<String,SolrCache> NO_GENERIC_CACHES = Collections.emptyMap();
@SuppressWarnings({"rawtypes"})
private static final SolrCache[] NO_CACHES = new SolrCache[0];
private final SolrCore core;
private final IndexSchema schema;
private final SolrDocumentFetcher docFetcher;
private final String name;
private final Date openTime = new Date();
private final long openNanoTime = System.nanoTime();
private Date registerTime;
private long warmupTime = 0;
private final DirectoryReader reader;
private final boolean closeReader;
private final int queryResultWindowSize;
private final int queryResultMaxDocsCached;
private final boolean useFilterForSortedQuery;
private final boolean cachingEnabled;
private final SolrCache<Query,DocSet> filterCache;
private final SolrCache<QueryResultKey,DocList> queryResultCache;
private final SolrCache<String,UnInvertedField> fieldValueCache;
// map of generic caches - not synchronized since it's read-only after the constructor.
@SuppressWarnings({"rawtypes"})
private final Map<String,SolrCache> cacheMap;
// list of all caches associated with this searcher.
@SuppressWarnings({"rawtypes"})
private final SolrCache[] cacheList;
private DirectoryFactory directoryFactory;
private final LeafReader leafReader;
// only for addIndexes etc (no fieldcache)
private final DirectoryReader rawReader;
private final String path;
private boolean releaseDirectory;
private final StatsCache statsCache;
private Set<String> metricNames = ConcurrentHashMap.newKeySet();
private SolrMetricsContext solrMetricsContext;
private static DirectoryReader getReader(SolrCore core, SolrIndexConfig config, DirectoryFactory directoryFactory,
String path) throws IOException {
final Directory dir = directoryFactory.get(path, DirContext.DEFAULT, config.lockType);
try {
return core.getIndexReaderFactory().newReader(dir, core);
} catch (Exception e) {
directoryFactory.release(dir);
throw new SolrException(ErrorCode.SERVER_ERROR, "Error opening Reader", e);
}
}
// TODO: wrap elsewhere and return a "map" from the schema that overrides get() ?
// this reader supports reopen
private static DirectoryReader wrapReader(SolrCore core, DirectoryReader reader) throws IOException {
assert reader != null;
return ExitableDirectoryReader.wrap(
UninvertingReader.wrap(reader, core.getLatestSchema().getUninversionMapper()),
SolrQueryTimeoutImpl.getInstance());
}
/**
* Builds the necessary collector chain (via delegate wrapping) and executes the query against it. This method takes
* into consideration both the explicitly provided collector and postFilter as well as any needed collector wrappers
* for dealing with options specified in the QueryCommand.
* @return The collector used for search
*/
private Collector buildAndRunCollectorChain(QueryResult qr, Query query, Collector collector, QueryCommand cmd,
DelegatingCollector postFilter) throws IOException {
EarlyTerminatingSortingCollector earlyTerminatingSortingCollector = null;
if (cmd.getSegmentTerminateEarly()) {
final Sort cmdSort = cmd.getSort();
final int cmdLen = cmd.getLen();
final Sort mergeSort = core.getSolrCoreState().getMergePolicySort();
if (cmdSort == null || cmdLen <= 0 || mergeSort == null ||
!EarlyTerminatingSortingCollector.canEarlyTerminate(cmdSort, mergeSort)) {
log.warn("unsupported combination: segmentTerminateEarly=true cmdSort={} cmdLen={} mergeSort={}", cmdSort, cmdLen, mergeSort);
} else {
collector = earlyTerminatingSortingCollector = new EarlyTerminatingSortingCollector(collector, cmdSort, cmd.getLen());
}
}
final boolean terminateEarly = cmd.getTerminateEarly();
if (terminateEarly) {
collector = new EarlyTerminatingCollector(collector, cmd.getLen());
}
final long timeAllowed = cmd.getTimeAllowed();
if (timeAllowed > 0) {
collector = new TimeLimitingCollector(collector, TimeLimitingCollector.getGlobalCounter(), timeAllowed);
}
if (postFilter != null) {
postFilter.setLastDelegate(collector);
collector = postFilter;
}
try {
super.search(query, collector);
} catch (TimeLimitingCollector.TimeExceededException | ExitableDirectoryReader.ExitingReaderException x) {
log.warn("Query: [{}]; ", query, x);
qr.setPartialResults(true);
} catch (EarlyTerminatingCollectorException etce) {
if (collector instanceof DelegatingCollector) {
((DelegatingCollector) collector).finish();
}
throw etce;
} finally {
if (earlyTerminatingSortingCollector != null) {
qr.setSegmentTerminatedEarly(earlyTerminatingSortingCollector.terminatedEarly());
}
}
if (collector instanceof DelegatingCollector) {
((DelegatingCollector) collector).finish();
}
return collector;
}
public SolrIndexSearcher(SolrCore core, String path, IndexSchema schema, SolrIndexConfig config, String name,
boolean enableCache, DirectoryFactory directoryFactory) throws IOException {
// We don't need to reserve the directory because we get it from the factory
this(core, path, schema, name, getReader(core, config, directoryFactory, path), true, enableCache, false,
directoryFactory);
// Release the directory at close.
this.releaseDirectory = true;
}
@SuppressWarnings({"unchecked", "rawtypes"})
public SolrIndexSearcher(SolrCore core, String path, IndexSchema schema, String name, DirectoryReader r,
boolean closeReader, boolean enableCache, boolean reserveDirectory, DirectoryFactory directoryFactory)
throws IOException {
super(wrapReader(core, r), core.getCoreContainer().getCollectorExecutor());
this.path = path;
this.directoryFactory = directoryFactory;
this.reader = (DirectoryReader) super.readerContext.reader();
this.rawReader = r;
this.leafReader = SlowCompositeReaderWrapper.wrap(this.reader);
this.core = core;
this.statsCache = core.createStatsCache();
this.schema = schema;
this.name = "Searcher@" + Integer.toHexString(hashCode()) + "[" + core.getName() + "]"
+ (name != null ? " " + name : "");
log.debug("Opening [{}]", this.name);
if (directoryFactory.searchersReserveCommitPoints()) {
// reserve commit point for life of searcher
// TODO: This may not be safe w/softCommit, see SOLR-13908
core.getDeletionPolicy().saveCommitPoint(reader.getIndexCommit().getGeneration());
}
if (reserveDirectory) {
// Keep the directory from being released while we use it.
directoryFactory.incRef(getIndexReader().directory());
// Make sure to release it when closing.
this.releaseDirectory = true;
}
this.closeReader = closeReader;
setSimilarity(schema.getSimilarity());
final SolrConfig solrConfig = core.getSolrConfig();
this.queryResultWindowSize = solrConfig.queryResultWindowSize;
this.queryResultMaxDocsCached = solrConfig.queryResultMaxDocsCached;
this.useFilterForSortedQuery = solrConfig.useFilterForSortedQuery;
this.docFetcher = new SolrDocumentFetcher(this, solrConfig, enableCache);
this.cachingEnabled = enableCache;
if (cachingEnabled) {
final ArrayList<SolrCache> clist = new ArrayList<>();
fieldValueCache = solrConfig.fieldValueCacheConfig == null ? null
: solrConfig.fieldValueCacheConfig.newInstance();
if (fieldValueCache != null) clist.add(fieldValueCache);
filterCache = solrConfig.filterCacheConfig == null ? null : solrConfig.filterCacheConfig.newInstance();
if (filterCache != null) clist.add(filterCache);
queryResultCache = solrConfig.queryResultCacheConfig == null ? null
: solrConfig.queryResultCacheConfig.newInstance();
if (queryResultCache != null) clist.add(queryResultCache);
SolrCache<Integer, Document> documentCache = docFetcher.getDocumentCache();
if (documentCache != null) clist.add(documentCache);
if (solrConfig.userCacheConfigs.isEmpty()) {
cacheMap = NO_GENERIC_CACHES;
} else {
cacheMap = new HashMap<>(solrConfig.userCacheConfigs.size());
for (Map.Entry<String,CacheConfig> e : solrConfig.userCacheConfigs.entrySet()) {
SolrCache cache = e.getValue().newInstance();
if (cache != null) {
cacheMap.put(cache.name(), cache);
clist.add(cache);
}
}
}
cacheList = clist.toArray(new SolrCache[clist.size()]);
} else {
this.filterCache = null;
this.queryResultCache = null;
this.fieldValueCache = null;
this.cacheMap = NO_GENERIC_CACHES;
this.cacheList = NO_CACHES;
}
// We already have our own filter cache
setQueryCache(null);
// do this at the end since an exception in the constructor means we won't close
numOpens.incrementAndGet();
assert ObjectReleaseTracker.track(this);
}
public SolrDocumentFetcher getDocFetcher() {
return docFetcher;
}
List<LeafReaderContext> getLeafContexts() {
return super.leafContexts;
}
public StatsCache getStatsCache() {
return statsCache;
}
public FieldInfos getFieldInfos() {
return leafReader.getFieldInfos();
}
/*
* Override these two methods to provide a way to use global collection stats.
*/
@Override
public TermStatistics termStatistics(Term term, int docFreq, long totalTermFreq) throws IOException {
final SolrRequestInfo reqInfo = SolrRequestInfo.getRequestInfo();
if (reqInfo != null) {
final StatsSource statsSrc = (StatsSource) reqInfo.getReq().getContext().get(STATS_SOURCE);
if (statsSrc != null) {
return statsSrc.termStatistics(this, term, docFreq, totalTermFreq);
}
}
return localTermStatistics(term, docFreq, totalTermFreq);
}
@Override
public CollectionStatistics collectionStatistics(String field) throws IOException {
final SolrRequestInfo reqInfo = SolrRequestInfo.getRequestInfo();
if (reqInfo != null) {
final StatsSource statsSrc = (StatsSource) reqInfo.getReq().getContext().get(STATS_SOURCE);
if (statsSrc != null) {
return statsSrc.collectionStatistics(this, field);
}
}
return localCollectionStatistics(field);
}
public TermStatistics localTermStatistics(Term term, int docFreq, long totalTermFreq) throws IOException {
return super.termStatistics(term, docFreq, totalTermFreq);
}
public CollectionStatistics localCollectionStatistics(String field) throws IOException {
// Could call super.collectionStatistics(field); but we can use a cached MultiTerms
assert field != null;
// SlowAtomicReader has a cache of MultiTerms
Terms terms = getSlowAtomicReader().terms(field);
if (terms == null) {
return null;
}
return new CollectionStatistics(field, reader.maxDoc(),
terms.getDocCount(), terms.getSumTotalTermFreq(), terms.getSumDocFreq());
}
public boolean isCachingEnabled() {
return cachingEnabled;
}
public String getPath() {
return path;
}
@Override
public String toString() {
return name + "{" + reader + "}";
}
public SolrCore getCore() {
return core;
}
public final int maxDoc() {
return reader.maxDoc();
}
public final int numDocs() {
return reader.numDocs();
}
public final int docFreq(Term term) throws IOException {
return reader.docFreq(term);
}
/**
* Not recommended to call this method unless there is some particular reason due to internally calling {@link SlowCompositeReaderWrapper}.
* Use {@link IndexSearcher#leafContexts} to get the sub readers instead of using this method.
*/
public final LeafReader getSlowAtomicReader() {
return leafReader;
}
/** Raw reader (no fieldcaches etc). Useful for operations like addIndexes */
public final DirectoryReader getRawReader() {
return rawReader;
}
@Override
public final DirectoryReader getIndexReader() {
assert reader == super.getIndexReader();
return reader;
}
/**
* Register sub-objects such as caches and our own metrics
*/
public void register() {
final Map<String,SolrInfoBean> infoRegistry = core.getInfoRegistry();
// register self
infoRegistry.put(STATISTICS_KEY, this);
infoRegistry.put(name, this);
for (@SuppressWarnings({"rawtypes"})SolrCache cache : cacheList) {
cache.setState(SolrCache.State.LIVE);
infoRegistry.put(cache.name(), cache);
}
this.solrMetricsContext = core.getSolrMetricsContext().getChildContext(this);
for (@SuppressWarnings({"rawtypes"})SolrCache cache : cacheList) {
cache.initializeMetrics(solrMetricsContext, SolrMetricManager.mkName(cache.name(), STATISTICS_KEY));
}
initializeMetrics(solrMetricsContext, STATISTICS_KEY);
registerTime = new Date();
}
/**
* Free's resources associated with this searcher.
*
* In particular, the underlying reader and any cache's in use are closed.
*/
@Override
public void close() throws IOException {
if (log.isDebugEnabled()) {
if (cachingEnabled) {
final StringBuilder sb = new StringBuilder();
sb.append("Closing ").append(name);
for (@SuppressWarnings({"rawtypes"})SolrCache cache : cacheList) {
sb.append("\n\t");
sb.append(cache);
}
log.debug("{}", sb);
} else {
log.debug("Closing [{}]", name);
}
}
core.getInfoRegistry().remove(name);
// super.close();
// can't use super.close() since it just calls reader.close() and that may only be called once
// per reader (even if incRef() was previously called).
long cpg = reader.getIndexCommit().getGeneration();
try {
if (closeReader) rawReader.decRef();
} catch (Exception e) {
SolrException.log(log, "Problem dec ref'ing reader", e);
}
if (directoryFactory.searchersReserveCommitPoints()) {
core.getDeletionPolicy().releaseCommitPoint(cpg);
}
for (@SuppressWarnings({"rawtypes"})SolrCache cache : cacheList) {
try {
cache.close();
} catch (Exception e) {
SolrException.log(log, "Exception closing cache " + cache.name(), e);
}
}
if (releaseDirectory) {
directoryFactory.release(getIndexReader().directory());
}
try {
SolrInfoBean.super.close();
} catch (Exception e) {
log.warn("Exception closing", e);
}
// do this at the end so it only gets done if there are no exceptions
numCloses.incrementAndGet();
assert ObjectReleaseTracker.release(this);
}
/** Direct access to the IndexSchema for use with this searcher */
public IndexSchema getSchema() {
return schema;
}
/**
* Returns a collection of all field names the index reader knows about.
*/
public Iterable<String> getFieldNames() {
return Iterables.transform(getFieldInfos(), fieldInfo -> fieldInfo.name);
}
public SolrCache<Query,DocSet> getFilterCache() {
return filterCache;
}
//
// Set default regenerators on filter and query caches if they don't have any
//
public static void initRegenerators(SolrConfig solrConfig) {
if (solrConfig.fieldValueCacheConfig != null && solrConfig.fieldValueCacheConfig.getRegenerator() == null) {
solrConfig.fieldValueCacheConfig.setRegenerator(new CacheRegenerator() {
@Override
public boolean regenerateItem(SolrIndexSearcher newSearcher,
@SuppressWarnings({"rawtypes"})SolrCache newCache,
@SuppressWarnings({"rawtypes"})SolrCache oldCache,
Object oldKey, Object oldVal) throws IOException {
if (oldVal instanceof UnInvertedField) {
UnInvertedField.getUnInvertedField((String) oldKey, newSearcher);
}
return true;
}
});
}
if (solrConfig.filterCacheConfig != null && solrConfig.filterCacheConfig.getRegenerator() == null) {
solrConfig.filterCacheConfig.setRegenerator(new CacheRegenerator() {
@Override
@SuppressWarnings({"rawtypes"})public boolean regenerateItem(SolrIndexSearcher newSearcher
, @SuppressWarnings({"rawtypes"})SolrCache newCache
, @SuppressWarnings({"rawtypes"})SolrCache oldCache,
Object oldKey, Object oldVal) throws IOException {
newSearcher.cacheDocSet((Query) oldKey, null, false);
return true;
}
});
}
if (solrConfig.queryResultCacheConfig != null && solrConfig.queryResultCacheConfig.getRegenerator() == null) {
final int queryResultWindowSize = solrConfig.queryResultWindowSize;
solrConfig.queryResultCacheConfig.setRegenerator(new CacheRegenerator() {
@Override
@SuppressWarnings({"rawtypes"})
public boolean regenerateItem(SolrIndexSearcher newSearcher, SolrCache newCache, SolrCache oldCache,
Object oldKey, Object oldVal) throws IOException {
QueryResultKey key = (QueryResultKey) oldKey;
int nDocs = 1;
// request 1 doc and let caching round up to the next window size...
// unless the window size is <=1, in which case we will pick
// the minimum of the number of documents requested last time and
// a reasonable number such as 40.
// TODO: make more configurable later...
if (queryResultWindowSize <= 1) {
DocList oldList = (DocList) oldVal;
int oldnDocs = oldList.offset() + oldList.size();
// 40 has factors of 2,4,5,10,20
nDocs = Math.min(oldnDocs, 40);
}
int flags = NO_CHECK_QCACHE | key.nc_flags;
QueryCommand qc = new QueryCommand();
qc.setQuery(key.query)
.setFilterList(key.filters)
.setSort(key.sort)
.setLen(nDocs)
.setSupersetMaxDoc(nDocs)
.setFlags(flags);
QueryResult qr = new QueryResult();
newSearcher.getDocListC(qr, qc);
return true;
}
});
}
}
public QueryResult search(QueryResult qr, QueryCommand cmd) throws IOException {
getDocListC(qr, cmd);
return qr;
}
// FIXME: This option has been dead/noop since 3.1, should we re-enable or remove it?
// public Hits search(Query query, Filter filter, Sort sort) throws IOException {
// // todo - when Solr starts accepting filters, need to
// // change this conditional check (filter!=null) and create a new filter
// // that ANDs them together if it already exists.
//
// if (optimizer==null || filter!=null || !(query instanceof BooleanQuery)
// ) {
// return super.search(query,filter,sort);
// } else {
// Query[] newQuery = new Query[1];
// Filter[] newFilter = new Filter[1];
// optimizer.optimize((BooleanQuery)query, this, 0, newQuery, newFilter);
//
// return super.search(newQuery[0], newFilter[0], sort);
// }
// }
/**
* Retrieve the {@link Document} instance corresponding to the document id.
*
* @see SolrDocumentFetcher
*/
@Override
public Document doc(int docId) throws IOException {
return doc(docId, (Set<String>) null);
}
/**
* Visit a document's fields using a {@link StoredFieldVisitor}.
* This method does not currently add to the Solr document cache.
*
* @see IndexReader#document(int, StoredFieldVisitor)
* @see SolrDocumentFetcher
*/
@Override
public final void doc(int docId, StoredFieldVisitor visitor) throws IOException {
getDocFetcher().doc(docId, visitor);
}
/**
* Retrieve the {@link Document} instance corresponding to the document id.
* <p>
* <b>NOTE</b>: the document will have all fields accessible, but if a field filter is provided, only the provided
* fields will be loaded (the remainder will be available lazily).
*
* @see SolrDocumentFetcher
*/
@Override
public final Document doc(int i, Set<String> fields) throws IOException {
return getDocFetcher().doc(i, fields);
}
/** expert: internal API, subject to change */
public SolrCache<String,UnInvertedField> getFieldValueCache() {
return fieldValueCache;
}
/** Returns a weighted sort according to this searcher */
public Sort weightSort(Sort sort) throws IOException {
return (sort != null) ? sort.rewrite(this) : null;
}
/** Returns a weighted sort spec according to this searcher */
public SortSpec weightSortSpec(SortSpec originalSortSpec, Sort nullEquivalent) throws IOException {
return implWeightSortSpec(
originalSortSpec.getSort(),
originalSortSpec.getCount(),
originalSortSpec.getOffset(),
nullEquivalent);
}
/** Returns a weighted sort spec according to this searcher */
private SortSpec implWeightSortSpec(Sort originalSort, int num, int offset, Sort nullEquivalent) throws IOException {
Sort rewrittenSort = weightSort(originalSort);
if (rewrittenSort == null) {
rewrittenSort = nullEquivalent;
}
final SortField[] rewrittenSortFields = rewrittenSort.getSort();
final SchemaField[] rewrittenSchemaFields = new SchemaField[rewrittenSortFields.length];
for (int ii = 0; ii < rewrittenSortFields.length; ++ii) {
final String fieldName = rewrittenSortFields[ii].getField();
rewrittenSchemaFields[ii] = (fieldName == null ? null : schema.getFieldOrNull(fieldName));
}
return new SortSpec(rewrittenSort, rewrittenSchemaFields, num, offset);
}
/**
* Returns the first document number containing the term <code>t</code> Returns -1 if no document was found. This
* method is primarily intended for clients that want to fetch documents using a unique identifier."
*
* @return the first document number containing the term
*/
public int getFirstMatch(Term t) throws IOException {
long pair = lookupId(t.field(), t.bytes());
if (pair == -1) {
return -1;
} else {
final int segIndex = (int) (pair >> 32);
final int segDocId = (int) pair;
return leafContexts.get(segIndex).docBase + segDocId;
}
}
/**
* lookup the docid by the unique key field, and return the id *within* the leaf reader in the low 32 bits, and the
* index of the leaf reader in the high 32 bits. -1 is returned if not found.
*
* @lucene.internal
*/
public long lookupId(BytesRef idBytes) throws IOException {
return lookupId(schema.getUniqueKeyField().getName(), idBytes);
}
private long lookupId(String field, BytesRef idBytes) throws IOException {
for (int i = 0, c = leafContexts.size(); i < c; i++) {
final LeafReaderContext leaf = leafContexts.get(i);
final LeafReader reader = leaf.reader();
final Terms terms = reader.terms(field);
if (terms == null) continue;
TermsEnum te = terms.iterator();
if (te.seekExact(idBytes)) {
PostingsEnum docs = te.postings(null, PostingsEnum.NONE);
docs = BitsFilteredPostingsEnum.wrap(docs, reader.getLiveDocs());
int id = docs.nextDoc();
if (id == DocIdSetIterator.NO_MORE_DOCS) continue;
assert docs.nextDoc() == DocIdSetIterator.NO_MORE_DOCS;
return (((long) i) << 32) | id;
}
}
return -1;
}
/**
* Compute and cache the DocSet that matches a query. The normal usage is expected to be cacheDocSet(myQuery,
* null,false) meaning that Solr will determine if the Query warrants caching, and if so, will compute the DocSet that
* matches the Query and cache it. If the answer to the query is already cached, nothing further will be done.
* <p>
* If the optionalAnswer DocSet is provided, it should *not* be modified after this call.
*
* @param query
* the lucene query that will act as the key
* @param optionalAnswer
* the DocSet to be cached - if null, it will be computed.
* @param mustCache
* if true, a best effort will be made to cache this entry. if false, heuristics may be used to determine if
* it should be cached.
*/
public void cacheDocSet(Query query, DocSet optionalAnswer, boolean mustCache) throws IOException {
// Even if the cache is null, still compute the DocSet as it may serve to warm the Lucene
// or OS disk cache.
if (optionalAnswer != null) {
if (filterCache != null) {
filterCache.put(query, optionalAnswer);
}
return;
}
// Throw away the result, relying on the fact that getDocSet
// will currently always cache what it found. If getDocSet() starts
// using heuristics about what to cache, and mustCache==true, (or if we
// want this method to start using heuristics too) then
// this needs to change.
getDocSet(query);
}
private BitDocSet makeBitDocSet(DocSet answer) {
// TODO: this should be implemented in DocSet, most likely with a getBits method that takes a maxDoc argument
// or make DocSet instances remember maxDoc
if (answer instanceof BitDocSet) {
return (BitDocSet) answer;
}
FixedBitSet bs = new FixedBitSet(maxDoc());
DocIterator iter = answer.iterator();
while (iter.hasNext()) {
bs.set(iter.nextDoc());
}
return new BitDocSet(bs, answer.size());
}
public BitDocSet getDocSetBits(Query q) throws IOException {
DocSet answer = getDocSet(q);
BitDocSet answerBits = makeBitDocSet(answer);
if (answerBits != answer && filterCache != null) {
filterCache.put(q, answerBits);
}
return answerBits;
}
/**
* Returns the set of document ids matching a query. This method is cache-aware and attempts to retrieve the answer
* from the cache if possible. If the answer was not cached, it may have been inserted into the cache as a result of
* this call. This method can handle negative queries.
* <p>
* The DocSet returned should <b>not</b> be modified.
*/
public DocSet getDocSet(Query query) throws IOException {
if (query instanceof ExtendedQuery) {
ExtendedQuery eq = (ExtendedQuery) query;
if (!eq.getCache()) {
if (query instanceof WrappedQuery) {
query = ((WrappedQuery) query).getWrappedQuery();
}
query = QueryUtils.makeQueryable(query);
return getDocSetNC(query, null);
}
}
// Get the absolute value (positive version) of this query. If we
// get back the same reference, we know it's positive.
Query absQ = QueryUtils.getAbs(query);
boolean positive = query == absQ;
if (filterCache != null) {
DocSet absAnswer = filterCache.get(absQ);
if (absAnswer != null) {
if (positive) return absAnswer;
else return getLiveDocSet().andNot(absAnswer);
}
}
DocSet absAnswer = getDocSetNC(absQ, null);
DocSet answer = positive ? absAnswer : getLiveDocSet().andNot(absAnswer);
if (filterCache != null) {
// cache negative queries as positive
filterCache.put(absQ, absAnswer);
}
return answer;
}
// only handle positive (non negative) queries
DocSet getPositiveDocSet(Query q) throws IOException {
DocSet answer;
if (filterCache != null) {
answer = filterCache.get(q);
if (answer != null) return answer;
}
answer = getDocSetNC(q, null);
if (filterCache != null) filterCache.put(q, answer);
return answer;
}
private static Query matchAllDocsQuery = new MatchAllDocsQuery();
private volatile BitDocSet liveDocs;
@Deprecated // TODO remove for 8.0
public BitDocSet getLiveDocs() throws IOException {
return getLiveDocSet();
}
/**
* Returns an efficient random-access {@link DocSet} of the live docs. It's cached. Never null.
* @lucene.internal the type of DocSet returned may change in the future
*/
public BitDocSet getLiveDocSet() throws IOException {
// Going through the filter cache will provide thread safety here if we only had getLiveDocs,
// but the addition of setLiveDocs means we needed to add volatile to "liveDocs".
BitDocSet docs = liveDocs;
if (docs == null) {
//note: maybe should instead calc manually by segment, using FixedBitSet.copyOf(segLiveDocs); avoid filter cache?
liveDocs = docs = getDocSetBits(matchAllDocsQuery);
}
assert docs.size() == numDocs();
return docs;
}
/**
* Returns an efficient random-access {@link Bits} of the live docs. It's cached. Null means all docs are live.
* Use this like {@link LeafReader#getLiveDocs()}.
* @lucene.internal
*/
//TODO rename to getLiveDocs in 8.0
public Bits getLiveDocsBits() throws IOException {
return getIndexReader().hasDeletions() ? getLiveDocSet().getBits() : null;
}
/** @lucene.internal */
public boolean isLiveDocsInstantiated() {
return liveDocs != null;
}
/** @lucene.internal */
public void setLiveDocs(DocSet docs) {
// a few places currently expect BitDocSet
assert docs.size() == numDocs();
this.liveDocs = makeBitDocSet(docs);
}
private static Comparator<Query> sortByCost = (q1, q2) -> ((ExtendedQuery) q1).getCost() - ((ExtendedQuery) q2).getCost();
/**
* Returns the set of document ids matching all queries. This method is cache-aware and attempts to retrieve the
* answer from the cache if possible. If the answer was not cached, it may have been inserted into the cache as a
* result of this call. This method can handle negative queries.
* A null/empty list results in {@link #getLiveDocSet()}.
* <p>
* The DocSet returned should <b>not</b> be modified.
*/
public DocSet getDocSet(List<Query> queries) throws IOException {
ProcessedFilter pf = getProcessedFilter(null, queries);
if (pf.postFilter == null) {
if (pf.answer != null) {
return pf.answer;
} else if (pf.filter == null) {
return getLiveDocSet(); // note: this is what happens when queries is an empty list
}
}
DocSetCollector setCollector = new DocSetCollector(maxDoc());
Collector collector = setCollector;
if (pf.postFilter != null) {
pf.postFilter.setLastDelegate(collector);
collector = pf.postFilter;
}
Query query = pf.filter != null ? pf.filter : matchAllDocsQuery;
search(query, collector);
if (collector instanceof DelegatingCollector) {
((DelegatingCollector) collector).finish();
}
return DocSetUtil.getDocSet(setCollector, this);
}
/**
* INTERNAL: The response object from {@link #getProcessedFilter(DocSet, List)}.
* Holds a filter and postFilter pair that together match a set of documents.
* Either of them may be null, in which case the semantics are to match everything.
* @see #getProcessedFilter(DocSet, List)
*/
public static class ProcessedFilter {
public DocSet answer; // maybe null. Sometimes we have a docSet answer that represents the complete answer / result.
public Filter filter; // maybe null
public DelegatingCollector postFilter; // maybe null
}
/**
* INTERNAL: Processes conjunction (AND) of both args into a {@link ProcessedFilter} result.
* Either arg may be null/empty thus doesn't restrict the matching docs.
* Queries typically are resolved against the filter cache, and populate it.
*/
public ProcessedFilter getProcessedFilter(DocSet setFilter, List<Query> queries) throws IOException {
ProcessedFilter pf = new ProcessedFilter();
if (queries == null || queries.size() == 0) {
if (setFilter != null) {
pf.answer = setFilter;
pf.filter = setFilter.getTopFilter();
}
return pf;
}
// We combine all the filter queries that come from the filter cache & setFilter into "answer".
// This might become pf.filterAsDocSet but not if there are any non-cached filters
DocSet answer = null;
boolean[] neg = new boolean[queries.size() + 1];
DocSet[] sets = new DocSet[queries.size() + 1];
List<Query> notCached = null;
List<Query> postFilters = null;
int end = 0;
int smallestIndex = -1;
if (setFilter != null) {
answer = sets[end++] = setFilter;
smallestIndex = end;
} // we are done with setFilter at this point
int smallestCount = Integer.MAX_VALUE;
for (Query q : queries) {
if (q instanceof ExtendedQuery) {
ExtendedQuery eq = (ExtendedQuery) q;
if (!eq.getCache()) {
if (eq.getCost() >= 100 && eq instanceof PostFilter) {
if (postFilters == null) postFilters = new ArrayList<>(sets.length - end);
postFilters.add(q);
} else {
if (notCached == null) notCached = new ArrayList<>(sets.length - end);
notCached.add(q);
}
continue;
}
}
if (filterCache == null) {
// there is no cache: don't pull bitsets
if (notCached == null) notCached = new ArrayList<>(sets.length - end);
WrappedQuery uncached = new WrappedQuery(q);
uncached.setCache(false);
notCached.add(uncached);
continue;
}
Query posQuery = QueryUtils.getAbs(q);
sets[end] = getPositiveDocSet(posQuery);
// Negative query if absolute value different from original
if (q == posQuery) {
neg[end] = false;
// keep track of the smallest positive set.
// This optimization is only worth it if size() is cached, which it would
// be if we don't do any set operations.
int sz = sets[end].size();
if (sz < smallestCount) {
smallestCount = sz;
smallestIndex = end;
answer = sets[end];
}
} else {
neg[end] = true;
}
end++;
}
// Are all of our normal cached filters negative?
if (end > 0 && answer == null) {
answer = getLiveDocSet();
}
// do negative queries first to shrink set size
for (int i = 0; i < end; i++) {
if (neg[i]) answer = answer.andNot(sets[i]);
}
for (int i = 0; i < end; i++) {
if (!neg[i] && i != smallestIndex) answer = answer.intersection(sets[i]);
}
// ignore "answer" if it simply matches all docs
if (answer != null && answer.size() == numDocs()) {
answer = null;
}
// answer is done.
// If no notCached nor postFilters, we can return now.
if (notCached == null && postFilters == null) {
// "answer" is the only part of the filter, so set it.
if (answer != null) {
pf.answer = answer;
pf.filter = answer.getTopFilter();
}
return pf;
}
// pf.answer will remain null ... (our local "answer" var is not the complete answer)
// Set pf.filter based on combining "answer" and "notCached"
if (notCached == null) {
if (answer != null) {
pf.filter = answer.getTopFilter();
}
} else {
Collections.sort(notCached, sortByCost);
List<Weight> weights = new ArrayList<>(notCached.size());
for (Query q : notCached) {
Query qq = QueryUtils.makeQueryable(q);
weights.add(createWeight(rewrite(qq), ScoreMode.COMPLETE_NO_SCORES, 1));
}
pf.filter = new FilterImpl(answer, weights);
}
// Set pf.postFilter
if (postFilters != null) {
Collections.sort(postFilters, sortByCost);
for (int i = postFilters.size() - 1; i >= 0; i--) {
DelegatingCollector prev = pf.postFilter;
pf.postFilter = ((PostFilter) postFilters.get(i)).getFilterCollector(this);
if (prev != null) pf.postFilter.setDelegate(prev);
}
}
return pf;
}
/** @lucene.internal */
public DocSet getDocSet(DocsEnumState deState) throws IOException {
int largestPossible = deState.termsEnum.docFreq();
boolean useCache = filterCache != null && largestPossible >= deState.minSetSizeCached;
TermQuery key = null;
if (useCache) {
key = new TermQuery(new Term(deState.fieldName, deState.termsEnum.term()));
DocSet result = filterCache.get(key);
if (result != null) return result;
}
int smallSetSize = DocSetUtil.smallSetSize(maxDoc());
int scratchSize = Math.min(smallSetSize, largestPossible);
if (deState.scratch == null || deState.scratch.length < scratchSize) deState.scratch = new int[scratchSize];
final int[] docs = deState.scratch;
int upto = 0;
int bitsSet = 0;
FixedBitSet fbs = null;
PostingsEnum postingsEnum = deState.termsEnum.postings(deState.postingsEnum, PostingsEnum.NONE);
postingsEnum = BitsFilteredPostingsEnum.wrap(postingsEnum, deState.liveDocs);
if (deState.postingsEnum == null) {
deState.postingsEnum = postingsEnum;
}
if (postingsEnum instanceof MultiPostingsEnum) {
MultiPostingsEnum.EnumWithSlice[] subs = ((MultiPostingsEnum) postingsEnum).getSubs();
int numSubs = ((MultiPostingsEnum) postingsEnum).getNumSubs();
for (int subindex = 0; subindex < numSubs; subindex++) {
MultiPostingsEnum.EnumWithSlice sub = subs[subindex];
if (sub.postingsEnum == null) continue;
int base = sub.slice.start;
int docid;
if (largestPossible > docs.length) {
if (fbs == null) fbs = new FixedBitSet(maxDoc());
while ((docid = sub.postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
fbs.set(docid + base);
bitsSet++;
}
} else {
while ((docid = sub.postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
docs[upto++] = docid + base;
}
}
}
} else {
int docid;
if (largestPossible > docs.length) {
fbs = new FixedBitSet(maxDoc());
while ((docid = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
fbs.set(docid);
bitsSet++;
}
} else {
while ((docid = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
docs[upto++] = docid;
}
}
}
DocSet result;
if (fbs != null) {
for (int i = 0; i < upto; i++) {
fbs.set(docs[i]);
}
bitsSet += upto;
result = new BitDocSet(fbs, bitsSet);
} else {
result = upto == 0 ? DocSet.empty() : new SortedIntDocSet(Arrays.copyOf(docs, upto));
}
if (useCache) {
filterCache.put(key, result);
}
return result;
}
// query must be positive
protected DocSet getDocSetNC(Query query, DocSet filter) throws IOException {
return DocSetUtil.createDocSet(this, query, filter);
}
/**
* Returns the set of document ids matching both the query and the filter. This method is cache-aware and attempts to
* retrieve the answer from the cache if possible. If the answer was not cached, it may have been inserted into the
* cache as a result of this call.
* <p>
*
* @param filter
* may be null
* @return DocSet meeting the specified criteria, should <b>not</b> be modified by the caller.
*/
public DocSet getDocSet(Query query, DocSet filter) throws IOException {
if (filter == null) return getDocSet(query);
if (query instanceof ExtendedQuery) {
ExtendedQuery eq = (ExtendedQuery) query;
if (!eq.getCache()) {
if (query instanceof WrappedQuery) {
query = ((WrappedQuery) query).getWrappedQuery();
}
query = QueryUtils.makeQueryable(query);
return getDocSetNC(query, filter);
}
}
// Negative query if absolute value different from original
Query absQ = QueryUtils.getAbs(query);
boolean positive = absQ == query;
DocSet first;
if (filterCache != null) {
first = filterCache.get(absQ);
if (first == null) {
first = getDocSetNC(absQ, null);
filterCache.put(absQ, first);
}
return positive ? first.intersection(filter) : filter.andNot(first);
}
// If there isn't a cache, then do a single filtered query if positive.
return positive ? getDocSetNC(absQ, filter) : filter.andNot(getPositiveDocSet(absQ));
}
/**
* Returns documents matching both <code>query</code> and <code>filter</code> and sorted by <code>sort</code>.
* <p>
* This method is cache aware and may retrieve <code>filter</code> from the cache or make an insertion into the cache
* as a result of this call.
* <p>
* FUTURE: The returned DocList may be retrieved from a cache.
*
* @param filter
* may be null
* @param lsort
* criteria by which to sort (if null, query relevance is used)
* @param offset
* offset into the list of documents to return
* @param len
* maximum number of documents to return
* @return DocList meeting the specified criteria, should <b>not</b> be modified by the caller.
* @throws IOException
* If there is a low-level I/O error.
*/
public DocList getDocList(Query query, Query filter, Sort lsort, int offset, int len) throws IOException {
QueryCommand qc = new QueryCommand();
qc.setQuery(query)
.setFilterList(filter)
.setSort(lsort)
.setOffset(offset)
.setLen(len);
QueryResult qr = new QueryResult();
search(qr, qc);
return qr.getDocList();
}
/**
* Returns documents matching both <code>query</code> and the intersection of the <code>filterList</code>, sorted by
* <code>sort</code>.
* <p>
* This method is cache aware and may retrieve <code>filter</code> from the cache or make an insertion into the cache
* as a result of this call.
* <p>
* FUTURE: The returned DocList may be retrieved from a cache.
*
* @param filterList
* may be null
* @param lsort
* criteria by which to sort (if null, query relevance is used)
* @param offset
* offset into the list of documents to return
* @param len
* maximum number of documents to return
* @return DocList meeting the specified criteria, should <b>not</b> be modified by the caller.
* @throws IOException
* If there is a low-level I/O error.
*/
public DocList getDocList(Query query, List<Query> filterList, Sort lsort, int offset, int len, int flags)
throws IOException {
QueryCommand qc = new QueryCommand();
qc.setQuery(query)
.setFilterList(filterList)
.setSort(lsort)
.setOffset(offset)
.setLen(len)
.setFlags(flags);
QueryResult qr = new QueryResult();
search(qr, qc);
return qr.getDocList();
}
public static final int NO_CHECK_QCACHE = 0x80000000;
public static final int GET_DOCSET = 0x40000000;
static final int NO_CHECK_FILTERCACHE = 0x20000000;
static final int NO_SET_QCACHE = 0x10000000;
static final int SEGMENT_TERMINATE_EARLY = 0x08;
public static final int TERMINATE_EARLY = 0x04;
public static final int GET_DOCLIST = 0x02; // get the documents actually returned in a response
public static final int GET_SCORES = 0x01;
/**
* getDocList version that uses+populates query and filter caches. In the event of a timeout, the cache is not
* populated.
*/
private void getDocListC(QueryResult qr, QueryCommand cmd) throws IOException {
DocListAndSet out = new DocListAndSet();
qr.setDocListAndSet(out);
QueryResultKey key = null;
int maxDocRequested = cmd.getOffset() + cmd.getLen();
// check for overflow, and check for # docs in index
if (maxDocRequested < 0 || maxDocRequested > maxDoc()) maxDocRequested = maxDoc();
int supersetMaxDoc = maxDocRequested;
DocList superset = null;
int flags = cmd.getFlags();
Query q = cmd.getQuery();
if (q instanceof ExtendedQuery) {
ExtendedQuery eq = (ExtendedQuery) q;
if (!eq.getCache()) {
flags |= (NO_CHECK_QCACHE | NO_SET_QCACHE | NO_CHECK_FILTERCACHE);
}
}
// we can try and look up the complete query in the cache.
// we can't do that if filter!=null though (we don't want to
// do hashCode() and equals() for a big DocSet).
if (queryResultCache != null && cmd.getFilter() == null
&& (flags & (NO_CHECK_QCACHE | NO_SET_QCACHE)) != ((NO_CHECK_QCACHE | NO_SET_QCACHE))) {
// all of the current flags can be reused during warming,
// so set all of them on the cache key.
key = new QueryResultKey(q, cmd.getFilterList(), cmd.getSort(), flags, cmd.getMinExactCount());
if ((flags & NO_CHECK_QCACHE) == 0) {
superset = queryResultCache.get(key);
if (superset != null) {
// check that the cache entry has scores recorded if we need them
if ((flags & GET_SCORES) == 0 || superset.hasScores()) {
// NOTE: subset() returns null if the DocList has fewer docs than
// requested
out.docList = superset.subset(cmd.getOffset(), cmd.getLen());
}
}
if (out.docList != null) {
// found the docList in the cache... now check if we need the docset too.
// OPT: possible future optimization - if the doclist contains all the matches,
// use it to make the docset instead of rerunning the query.
if (out.docSet == null && ((flags & GET_DOCSET) != 0)) {
if (cmd.getFilterList() == null) {
out.docSet = getDocSet(cmd.getQuery());
} else {
List<Query> newList = new ArrayList<>(cmd.getFilterList().size() + 1);
newList.add(cmd.getQuery());
newList.addAll(cmd.getFilterList());
out.docSet = getDocSet(newList);
}
}
return;
}
}
// If we are going to generate the result, bump up to the
// next resultWindowSize for better caching.
if ((flags & NO_SET_QCACHE) == 0) {
// handle 0 special case as well as avoid idiv in the common case.
if (maxDocRequested < queryResultWindowSize) {
supersetMaxDoc = queryResultWindowSize;
} else {
supersetMaxDoc = ((maxDocRequested - 1) / queryResultWindowSize + 1) * queryResultWindowSize;
if (supersetMaxDoc < 0) supersetMaxDoc = maxDocRequested;
}
} else {
key = null; // we won't be caching the result
}
}
cmd.setSupersetMaxDoc(supersetMaxDoc);
// OK, so now we need to generate an answer.
// One way to do that would be to check if we have an unordered list
// of results for the base query. If so, we can apply the filters and then
// sort by the resulting set. This can only be used if:
// - the sort doesn't contain score
// - we don't want score returned.
// check if we should try and use the filter cache
boolean useFilterCache = false;
if ((flags & (GET_SCORES | NO_CHECK_FILTERCACHE)) == 0 && useFilterForSortedQuery && cmd.getSort() != null
&& filterCache != null) {
useFilterCache = true;
SortField[] sfields = cmd.getSort().getSort();
for (SortField sf : sfields) {
if (sf.getType() == SortField.Type.SCORE) {
useFilterCache = false;
break;
}
}
}
if (useFilterCache) {
// now actually use the filter cache.
// for large filters that match few documents, this may be
// slower than simply re-executing the query.
if (out.docSet == null) {
out.docSet = getDocSet(cmd.getQuery(), cmd.getFilter());
List<Query> filterList = cmd.getFilterList();
if (filterList != null && !filterList.isEmpty()) {
out.docSet = out.docSet.intersection(getDocSet(cmd.getFilterList()));
}
}
// todo: there could be a sortDocSet that could take a list of
// the filters instead of anding them first...
// perhaps there should be a multi-docset-iterator
sortDocSet(qr, cmd);
} else {
// do it the normal way...
if ((flags & GET_DOCSET) != 0) {
// this currently conflates returning the docset for the base query vs
// the base query and all filters.
DocSet qDocSet = getDocListAndSetNC(qr, cmd);
// cache the docSet matching the query w/o filtering
if (qDocSet != null && filterCache != null && !qr.isPartialResults()) filterCache.put(cmd.getQuery(), qDocSet);
} else {
getDocListNC(qr, cmd);
}
assert null != out.docList : "docList is null";
}
if (null == cmd.getCursorMark()) {
// Kludge...
// we can't use DocSlice.subset, even though it should be an identity op
// because it gets confused by situations where there are lots of matches, but
// less docs in the slice then were requested, (due to the cursor)
// so we have to short circuit the call.
// None of which is really a problem since we can't use caching with
// cursors anyway, but it still looks weird to have to special case this
// behavior based on this condition - hence the long explanation.
superset = out.docList;
out.docList = superset.subset(cmd.getOffset(), cmd.getLen());
} else {
// sanity check our cursor assumptions
assert null == superset : "cursor: superset isn't null";
assert 0 == cmd.getOffset() : "cursor: command offset mismatch";
assert 0 == out.docList.offset() : "cursor: docList offset mismatch";
assert cmd.getLen() >= supersetMaxDoc : "cursor: superset len mismatch: " + cmd.getLen() + " vs "
+ supersetMaxDoc;
}
// lastly, put the superset in the cache if the size is less than or equal
// to queryResultMaxDocsCached
if (key != null && superset.size() <= queryResultMaxDocsCached && !qr.isPartialResults()) {
queryResultCache.put(key, superset);
}
}
/**
* Helper method for extracting the {@link FieldDoc} sort values from a {@link TopFieldDocs} when available and making
* the appropriate call to {@link QueryResult#setNextCursorMark} when applicable.
*
* @param qr
* <code>QueryResult</code> to modify
* @param qc
* <code>QueryCommand</code> for context of method
* @param topDocs
* May or may not be a <code>TopFieldDocs</code>
*/
private void populateNextCursorMarkFromTopDocs(QueryResult qr, QueryCommand qc, TopDocs topDocs) {
// TODO: would be nice to rename & generalize this method for non-cursor cases...
// ...would be handy to reuse the ScoreDoc/FieldDoc sort vals directly in distrib sort
// ...but that has non-trivial queryResultCache implications
// See: SOLR-5595
if (null == qc.getCursorMark()) {
// nothing to do, short circuit out
return;
}
final CursorMark lastCursorMark = qc.getCursorMark();
// if we have a cursor, then we have a sort that at minimum involves uniqueKey..
// so we must have a TopFieldDocs containing FieldDoc[]
assert topDocs instanceof TopFieldDocs : "TopFieldDocs cursor constraint violated";
final TopFieldDocs topFieldDocs = (TopFieldDocs) topDocs;
final ScoreDoc[] scoreDocs = topFieldDocs.scoreDocs;
if (0 == scoreDocs.length) {
// no docs on this page, re-use existing cursor mark
qr.setNextCursorMark(lastCursorMark);
} else {
ScoreDoc lastDoc = scoreDocs[scoreDocs.length - 1];
assert lastDoc instanceof FieldDoc : "FieldDoc cursor constraint violated";
List<Object> lastFields = Arrays.<Object> asList(((FieldDoc) lastDoc).fields);
CursorMark nextCursorMark = lastCursorMark.createNext(lastFields);
assert null != nextCursorMark : "null nextCursorMark";
qr.setNextCursorMark(nextCursorMark);
}
}
/**
* Helper method for inspecting QueryCommand and creating the appropriate {@link TopDocsCollector}
*
* @param len
* the number of docs to return
* @param cmd
* The Command whose properties should determine the type of TopDocsCollector to use.
*/
@SuppressWarnings({"rawtypes"})
private TopDocsCollector buildTopDocsCollector(int len, QueryCommand cmd) throws IOException {
int minNumFound = cmd.getMinExactCount();
Query q = cmd.getQuery();
if (q instanceof RankQuery) {
RankQuery rq = (RankQuery) q;
return rq.getTopDocsCollector(len, cmd, this);
}
if (null == cmd.getSort()) {
assert null == cmd.getCursorMark() : "have cursor but no sort";
return TopScoreDocCollector.create(len, minNumFound);
} else {
// we have a sort
final Sort weightedSort = weightSort(cmd.getSort());
final CursorMark cursor = cmd.getCursorMark();
final FieldDoc searchAfter = (null != cursor ? cursor.getSearchAfterFieldDoc() : null);
return TopFieldCollector.create(weightedSort, len, searchAfter, minNumFound);
}
}
private void getDocListNC(QueryResult qr, QueryCommand cmd) throws IOException {
int len = cmd.getSupersetMaxDoc();
int last = len;
if (last < 0 || last > maxDoc()) last = maxDoc();
final int lastDocRequested = last;
int nDocsReturned;
int totalHits;
float maxScore;
int[] ids;
float[] scores;
boolean needScores = (cmd.getFlags() & GET_SCORES) != 0;
ProcessedFilter pf = getProcessedFilter(cmd.getFilter(), cmd.getFilterList());
final Query query = QueryUtils.combineQueryAndFilter(QueryUtils.makeQueryable(cmd.getQuery()), pf.filter);
Relation hitsRelation;
// handle zero case...
if (lastDocRequested <= 0) {
final float[] topscore = new float[] {Float.NEGATIVE_INFINITY};
final int[] numHits = new int[1];
Collector collector;
if (!needScores) {
collector = new SimpleCollector() {
@Override
public void collect(int doc) {
numHits[0]++;
}
@Override
public ScoreMode scoreMode() {
return ScoreMode.COMPLETE_NO_SCORES;
}
};
} else {
collector = new SimpleCollector() {
Scorable scorer;
@Override
public void setScorer(Scorable scorer) {
this.scorer = scorer;
}
@Override
public void collect(int doc) throws IOException {
numHits[0]++;
float score = scorer.score();
if (score > topscore[0]) topscore[0] = score;
}
@Override
public ScoreMode scoreMode() {
return ScoreMode.COMPLETE;
}
};
}
buildAndRunCollectorChain(qr, query, collector, cmd, pf.postFilter);
nDocsReturned = 0;
ids = new int[nDocsReturned];
scores = new float[nDocsReturned];
totalHits = numHits[0];
maxScore = totalHits > 0 ? topscore[0] : 0.0f;
// no docs on this page, so cursor doesn't change
qr.setNextCursorMark(cmd.getCursorMark());
hitsRelation = Relation.EQUAL_TO;
} else {
TopDocs topDocs;
log.info("calling from 2, query: "+query.getClass()); // nocommit
if (pf.postFilter != null || cmd.getSegmentTerminateEarly() || cmd.getTimeAllowed() > 0
|| query instanceof RankQuery || query instanceof GraphQuery) {
log.debug("skipping collector manager");
final TopDocsCollector<?> topCollector = buildTopDocsCollector(len, cmd);
MaxScoreCollector maxScoreCollector = null;
Collector collector = topCollector;
if ((cmd.getFlags() & GET_SCORES) != 0) {
maxScoreCollector = new MaxScoreCollector();
collector = MultiCollector.wrap(topCollector, maxScoreCollector);
}
ScoreMode scoreModeUsed = buildAndRunCollectorChain(qr, query, collector, cmd, pf.postFilter).scoreMode();
totalHits = topCollector.getTotalHits();
topDocs = topCollector.topDocs(0, len);
if (scoreModeUsed == ScoreMode.COMPLETE || scoreModeUsed == ScoreMode.COMPLETE_NO_SCORES) {
hitsRelation = TotalHits.Relation.EQUAL_TO;
} else {
hitsRelation = topDocs.totalHits.relation;
}
nDocsReturned = topDocs.scoreDocs.length;
maxScore = totalHits > 0 ? (maxScoreCollector == null ? Float.NaN : maxScoreCollector.getMaxScore()) : 0.0f;
} else {
log.debug("using collectormanager");
CollectorManagerResult result = searchCollectorManagers(len, cmd, query, true, true, false); // nocommit: need docset should be false
totalHits = result.totalHits;
maxScore = result.maxScore;
nDocsReturned = result.topDocs.scoreDocs.length;
topDocs = result.topDocs;
//TODO: Is this correct?
hitsRelation = topDocs.totalHits.relation;
}
if (cmd.getSort() != null && query instanceof RankQuery == false && (cmd.getFlags() & GET_SCORES) != 0) {
TopFieldCollector.populateScores(topDocs.scoreDocs, this, query);
}
populateNextCursorMarkFromTopDocs(qr, cmd, topDocs);
ids = new int[nDocsReturned];
scores = (cmd.getFlags() & GET_SCORES) != 0 ? new float[nDocsReturned] : null;
for (int i = 0; i < nDocsReturned; i++) {
ScoreDoc scoreDoc = topDocs.scoreDocs[i];
ids[i] = scoreDoc.doc;
if (scores != null) scores[i] = scoreDoc.score;
}
}
int sliceLen = Math.min(lastDocRequested, nDocsReturned);
if (sliceLen < 0) sliceLen = 0;
qr.setDocList(new DocSlice(0, sliceLen, ids, scores, totalHits, maxScore, hitsRelation));
}
CollectorManagerResult searchCollectorManagers(int len, QueryCommand cmd, Query query,
boolean needTopDocs, boolean needMaxScore, boolean needDocSet) throws IOException {
CollectorManager<MultiCollector, CollectorManagerResult> manager = new CollectorManager<MultiCollector, CollectorManagerResult>() {
@Override
public MultiCollector newCollector() throws IOException {
// TODO: DocCollector is not thread safe.
Collection<Collector> collectors = new ArrayList<Collector>();
if (needTopDocs) collectors.add(buildTopDocsCollector(len, cmd));
if (needMaxScore) collectors.add(new MaxScoreCollector());
if (needDocSet) collectors.add(new DocSetCollector(maxDoc()));
return (MultiCollector) MultiCollector.wrap(collectors);
}
@Override
public CollectorManagerResult reduce(Collection<MultiCollector> multiCollectors) throws IOException {
final TopDocs[] topDocs = new TopDocs[multiCollectors.size()];
float maxScore = 0.0f;
DocSet docSet = new BitDocSet(new FixedBitSet(maxDoc())); // TODO: if docset is not needed, avoid this initialization
int i = 0;
for (MultiCollector multiCollector: multiCollectors) {
int c = 0;
List<Collector> subCollectors = multiCollector.getCollectors();
TopDocsCollector topDocsCollector = needTopDocs? ((TopDocsCollector) subCollectors.get(c++)): null;
MaxScoreCollector maxScoreCollector = needMaxScore? ((MaxScoreCollector) subCollectors.get(c++)): null;
DocSetCollector docSetCollector = needDocSet? ((DocSetCollector) subCollectors.get(c++)): null;
if (needTopDocs) topDocs[i++] = topDocsCollector.topDocs(0, len);
if (needMaxScore)
if (!Float.isNaN(maxScoreCollector.getMaxScore()))
maxScore = Math.max(maxScore, maxScoreCollector.getMaxScore());
if (needDocSet) {
if (docSet == null) {
docSet = docSetCollector.getDocSet(); // TODO: Should this be always true? Convert null check into assert?
}
}
}
TopDocs mergedTopDocs;
if (topDocs != null && topDocs.length>0 && topDocs[0] instanceof TopFieldDocs) {
TopFieldDocs[] topFieldDocs = Arrays.copyOf(topDocs, topDocs.length, TopFieldDocs[].class);
mergedTopDocs = TopFieldDocs.merge(weightSort(cmd.getSort()), len, topFieldDocs);
} else {
mergedTopDocs = needTopDocs? TopDocs.merge(0, len, topDocs): null;
}
int totalHits = needTopDocs? (int)mergedTopDocs.totalHits.value: -1;
maxScore = totalHits > 0 ? maxScore : 0.0f;
return new CollectorManagerResult(mergedTopDocs, docSet, maxScore, totalHits);
}
};
CollectorManagerResult ret;
try {
ret = super.search(query, manager);
} catch (Exception ex) {
if (ex instanceof RuntimeException &&
ex.getCause() != null & ex.getCause() instanceof ExecutionException
&& ex.getCause().getCause() != null && ex.getCause().getCause() instanceof RuntimeException) {
throw (RuntimeException)ex.getCause().getCause();
} else {
throw ex;
}
}
return ret;
}
class CollectorManagerResult {
final TopDocs topDocs;
final DocSet docSet;
final float maxScore;
final int totalHits;
public CollectorManagerResult(TopDocs topDocs, DocSet docSet, float maxScore, int totalHits) {
this.topDocs = topDocs;
this.docSet = docSet;
this.maxScore = maxScore;
this.totalHits = totalHits;
}
}
// any DocSet returned is for the query only, without any filtering... that way it may
// be cached if desired.
private DocSet getDocListAndSetNC(QueryResult qr, QueryCommand cmd) throws IOException {
int len = cmd.getSupersetMaxDoc();
int last = len;
if (last < 0 || last > maxDoc()) last = maxDoc();
final int lastDocRequested = last;
int nDocsReturned;
int totalHits;
float maxScore;
int[] ids;
float[] scores;
DocSet set;
boolean needScores = (cmd.getFlags() & GET_SCORES) != 0;
int maxDoc = maxDoc();
cmd.setMinExactCount(Integer.MAX_VALUE);// We need the full DocSet
ProcessedFilter pf = getProcessedFilter(cmd.getFilter(), cmd.getFilterList());
final Query query = QueryUtils.combineQueryAndFilter(QueryUtils.makeQueryable(cmd.getQuery()), pf.filter);
// handle zero case...
if (lastDocRequested <= 0) {
final float[] topscore = new float[] {Float.NEGATIVE_INFINITY};
Collector collector;
final DocSetCollector setCollector = new DocSetCollector(maxDoc);
if (!needScores) {
collector = setCollector;
} else {
final Collector topScoreCollector = new SimpleCollector() {
Scorable scorer;
@Override
public void setScorer(Scorable scorer) throws IOException {
this.scorer = scorer;
}
@Override
public void collect(int doc) throws IOException {
float score = scorer.score();
if (score > topscore[0]) topscore[0] = score;
}
@Override
public ScoreMode scoreMode() {
return ScoreMode.TOP_SCORES;
}
};
collector = MultiCollector.wrap(setCollector, topScoreCollector);
}
log.info("calling from 3"); // nocommit
buildAndRunCollectorChain(qr, query, collector, cmd, pf.postFilter);
set = DocSetUtil.getDocSet(setCollector, this);
nDocsReturned = 0;
ids = new int[nDocsReturned];
scores = new float[nDocsReturned];
totalHits = set.size();
maxScore = totalHits > 0 ? topscore[0] : 0.0f;
// no docs on this page, so cursor doesn't change
qr.setNextCursorMark(cmd.getCursorMark());
} else {
TopDocs topDocs;
if (pf.postFilter != null || cmd.getSegmentTerminateEarly() || cmd.getTimeAllowed() > 0
|| query instanceof RankQuery || query instanceof GraphQuery) {
final TopDocsCollector topCollector = buildTopDocsCollector(len, cmd);
DocSetCollector setCollector = new DocSetCollector(maxDoc);
MaxScoreCollector maxScoreCollector = null;
List<Collector> collectors = new ArrayList<>(Arrays.asList(topCollector, setCollector));
if ((cmd.getFlags() & GET_SCORES) != 0) {
maxScoreCollector = new MaxScoreCollector();
collectors.add(maxScoreCollector);
}
totalHits = topCollector.getTotalHits();
set = DocSetUtil.getDocSet(setCollector, this);
assert (totalHits == set.size()) || qr.isPartialResults();
topDocs = topCollector.topDocs(0, len);
maxScore = totalHits > 0 ? (maxScoreCollector == null ? Float.NaN : maxScoreCollector.getMaxScore()) : 0.0f;
} else {
log.debug("using collectormanager");
CollectorManagerResult result = searchCollectorManagers(len, cmd, query, true, true, true);
set = result.docSet;
totalHits = result.totalHits;
assert (totalHits == set.size()) || qr.isPartialResults();
topDocs = result.topDocs;
maxScore = result.maxScore;
}
if (cmd.getSort() != null && query instanceof RankQuery == false && (cmd.getFlags() & GET_SCORES) != 0) {
TopFieldCollector.populateScores(topDocs.scoreDocs, this, query);
}
populateNextCursorMarkFromTopDocs(qr, cmd, topDocs);
nDocsReturned = topDocs.scoreDocs.length;
ids = new int[nDocsReturned];
scores = (cmd.getFlags() & GET_SCORES) != 0 ? new float[nDocsReturned] : null;
for (int i = 0; i < nDocsReturned; i++) {
ScoreDoc scoreDoc = topDocs.scoreDocs[i];
ids[i] = scoreDoc.doc;
if (scores != null) scores[i] = scoreDoc.score;
}
}
int sliceLen = Math.min(lastDocRequested, nDocsReturned);
if (sliceLen < 0) sliceLen = 0;
qr.setDocList(new DocSlice(0, sliceLen, ids, scores, totalHits, maxScore, TotalHits.Relation.EQUAL_TO));
// TODO: if we collect results before the filter, we just need to intersect with
// that filter to generate the DocSet for qr.setDocSet()
qr.setDocSet(set);
// TODO: currently we don't generate the DocSet for the base query,
// but the QueryDocSet == CompleteDocSet if filter==null.
return pf.filter == null && pf.postFilter == null ? qr.getDocSet() : null;
}
/**
* Returns documents matching both <code>query</code> and <code>filter</code> and sorted by <code>sort</code>. FUTURE:
* The returned DocList may be retrieved from a cache.
*
* @param filter
* may be null
* @param lsort
* criteria by which to sort (if null, query relevance is used)
* @param offset
* offset into the list of documents to return
* @param len
* maximum number of documents to return
* @return DocList meeting the specified criteria, should <b>not</b> be modified by the caller.
* @throws IOException
* If there is a low-level I/O error.
*/
public DocList getDocList(Query query, DocSet filter, Sort lsort, int offset, int len) throws IOException {
QueryCommand qc = new QueryCommand();
qc.setQuery(query)
.setFilter(filter)
.setSort(lsort)
.setOffset(offset)
.setLen(len);
QueryResult qr = new QueryResult();
search(qr, qc);
return qr.getDocList();
}
/**
* Returns documents matching both <code>query</code> and <code>filter</code> and sorted by <code>sort</code>. Also
* returns the complete set of documents matching <code>query</code> and <code>filter</code> (regardless of
* <code>offset</code> and <code>len</code>).
* <p>
* This method is cache aware and may retrieve <code>filter</code> from the cache or make an insertion into the cache
* as a result of this call.
* <p>
* FUTURE: The returned DocList may be retrieved from a cache.
* <p>
* The DocList and DocSet returned should <b>not</b> be modified.
*
* @param filter
* may be null
* @param lsort
* criteria by which to sort (if null, query relevance is used)
* @param offset
* offset into the list of documents to return
* @param len
* maximum number of documents to return
* @return DocListAndSet meeting the specified criteria, should <b>not</b> be modified by the caller.
* @throws IOException
* If there is a low-level I/O error.
*/
public DocListAndSet getDocListAndSet(Query query, Query filter, Sort lsort, int offset, int len) throws IOException {
QueryCommand qc = new QueryCommand();
qc.setQuery(query)
.setFilterList(filter)
.setSort(lsort)
.setOffset(offset)
.setLen(len)
.setNeedDocSet(true);
QueryResult qr = new QueryResult();
search(qr, qc);
return qr.getDocListAndSet();
}
/**
* Returns documents matching both <code>query</code> and <code>filter</code> and sorted by <code>sort</code>. Also
* returns the compete set of documents matching <code>query</code> and <code>filter</code> (regardless of
* <code>offset</code> and <code>len</code>).
* <p>
* This method is cache aware and may retrieve <code>filter</code> from the cache or make an insertion into the cache
* as a result of this call.
* <p>
* FUTURE: The returned DocList may be retrieved from a cache.
* <p>
* The DocList and DocSet returned should <b>not</b> be modified.
*
* @param filter
* may be null
* @param lsort
* criteria by which to sort (if null, query relevance is used)
* @param offset
* offset into the list of documents to return
* @param len
* maximum number of documents to return
* @param flags
* user supplied flags for the result set
* @return DocListAndSet meeting the specified criteria, should <b>not</b> be modified by the caller.
* @throws IOException
* If there is a low-level I/O error.
*/
public DocListAndSet getDocListAndSet(Query query, Query filter, Sort lsort, int offset, int len, int flags)
throws IOException {
QueryCommand qc = new QueryCommand();
qc.setQuery(query)
.setFilterList(filter)
.setSort(lsort)
.setOffset(offset)
.setLen(len)
.setFlags(flags)
.setNeedDocSet(true);
QueryResult qr = new QueryResult();
search(qr, qc);
return qr.getDocListAndSet();
}
/**
* Returns documents matching both <code>query</code> and the intersection of <code>filterList</code>, sorted by
* <code>sort</code>. Also returns the compete set of documents matching <code>query</code> and <code>filter</code>
* (regardless of <code>offset</code> and <code>len</code>).
* <p>
* This method is cache aware and may retrieve <code>filter</code> from the cache or make an insertion into the cache
* as a result of this call.
* <p>
* FUTURE: The returned DocList may be retrieved from a cache.
* <p>
* The DocList and DocSet returned should <b>not</b> be modified.
*
* @param filterList
* may be null
* @param lsort
* criteria by which to sort (if null, query relevance is used)
* @param offset
* offset into the list of documents to return
* @param len
* maximum number of documents to return
* @return DocListAndSet meeting the specified criteria, should <b>not</b> be modified by the caller.
* @throws IOException
* If there is a low-level I/O error.
*/
public DocListAndSet getDocListAndSet(Query query, List<Query> filterList, Sort lsort, int offset, int len)
throws IOException {
QueryCommand qc = new QueryCommand();
qc.setQuery(query)
.setFilterList(filterList)
.setSort(lsort)
.setOffset(offset)
.setLen(len)
.setNeedDocSet(true);
QueryResult qr = new QueryResult();
search(qr, qc);
return qr.getDocListAndSet();
}
/**
* Returns documents matching both <code>query</code> and the intersection of <code>filterList</code>, sorted by
* <code>sort</code>. Also returns the compete set of documents matching <code>query</code> and <code>filter</code>
* (regardless of <code>offset</code> and <code>len</code>).
* <p>
* This method is cache aware and may retrieve <code>filter</code> from the cache or make an insertion into the cache
* as a result of this call.
* <p>
* FUTURE: The returned DocList may be retrieved from a cache.
* <p>
* The DocList and DocSet returned should <b>not</b> be modified.
*
* @param filterList
* may be null
* @param lsort
* criteria by which to sort (if null, query relevance is used)
* @param offset
* offset into the list of documents to return
* @param len
* maximum number of documents to return
* @param flags
* user supplied flags for the result set
* @return DocListAndSet meeting the specified criteria, should <b>not</b> be modified by the caller.
* @throws IOException
* If there is a low-level I/O error.
*/
public DocListAndSet getDocListAndSet(Query query, List<Query> filterList, Sort lsort, int offset, int len, int flags)
throws IOException {
QueryCommand qc = new QueryCommand();
qc.setQuery(query)
.setFilterList(filterList)
.setSort(lsort)
.setOffset(offset)
.setLen(len)
.setFlags(flags)
.setNeedDocSet(true);
QueryResult qr = new QueryResult();
search(qr, qc);
return qr.getDocListAndSet();
}
/**
* Returns documents matching both <code>query</code> and <code>filter</code> and sorted by <code>sort</code>. Also
* returns the compete set of documents matching <code>query</code> and <code>filter</code> (regardless of
* <code>offset</code> and <code>len</code>).
* <p>
* FUTURE: The returned DocList may be retrieved from a cache.
*
* @param filter
* may be null
* @param lsort
* criteria by which to sort (if null, query relevance is used)
* @param offset
* offset into the list of documents to return
* @param len
* maximum number of documents to return
* @return DocListAndSet meeting the specified criteria, should <b>not</b> be modified by the caller.
* @throws IOException
* If there is a low-level I/O error.
*/
public DocListAndSet getDocListAndSet(Query query, DocSet filter, Sort lsort, int offset, int len)
throws IOException {
QueryCommand qc = new QueryCommand();
qc.setQuery(query)
.setFilter(filter)
.setSort(lsort)
.setOffset(offset)
.setLen(len)
.setNeedDocSet(true);
QueryResult qr = new QueryResult();
search(qr, qc);
return qr.getDocListAndSet();
}
/**
* Returns documents matching both <code>query</code> and <code>filter</code> and sorted by <code>sort</code>. Also
* returns the compete set of documents matching <code>query</code> and <code>filter</code> (regardless of
* <code>offset</code> and <code>len</code>).
* <p>
* This method is cache aware and may make an insertion into the cache as a result of this call.
* <p>
* FUTURE: The returned DocList may be retrieved from a cache.
* <p>
* The DocList and DocSet returned should <b>not</b> be modified.
*
* @param filter
* may be null
* @param lsort
* criteria by which to sort (if null, query relevance is used)
* @param offset
* offset into the list of documents to return
* @param len
* maximum number of documents to return
* @param flags
* user supplied flags for the result set
* @return DocListAndSet meeting the specified criteria, should <b>not</b> be modified by the caller.
* @throws IOException
* If there is a low-level I/O error.
*/
public DocListAndSet getDocListAndSet(Query query, DocSet filter, Sort lsort, int offset, int len, int flags)
throws IOException {
QueryCommand qc = new QueryCommand();
qc.setQuery(query)
.setFilter(filter)
.setSort(lsort)
.setOffset(offset)
.setLen(len)
.setFlags(flags)
.setNeedDocSet(true);
QueryResult qr = new QueryResult();
search(qr, qc);
return qr.getDocListAndSet();
}
protected void sortDocSet(QueryResult qr, QueryCommand cmd) throws IOException {
DocSet set = qr.getDocListAndSet().docSet;
int nDocs = cmd.getSupersetMaxDoc();
if (nDocs == 0) {
// SOLR-2923
qr.getDocListAndSet().docList = new DocSlice(0, 0, new int[0], null, set.size(), 0f, TotalHits.Relation.EQUAL_TO);
qr.setNextCursorMark(cmd.getCursorMark());
return;
}
// bit of a hack to tell if a set is sorted - do it better in the future.
boolean inOrder = set instanceof BitDocSet || set instanceof SortedIntDocSet;
@SuppressWarnings({"rawtypes"})
TopDocsCollector topCollector = buildTopDocsCollector(nDocs, cmd);
DocIterator iter = set.iterator();
int base = 0;
int end = 0;
int readerIndex = 0;
LeafCollector leafCollector = null;
while (iter.hasNext()) {
int doc = iter.nextDoc();
while (doc >= end) {
LeafReaderContext leaf = leafContexts.get(readerIndex++);
base = leaf.docBase;
end = base + leaf.reader().maxDoc();
leafCollector = topCollector.getLeafCollector(leaf);
// we should never need to set the scorer given the settings for the collector
}
leafCollector.collect(doc - base);
}
TopDocs topDocs = topCollector.topDocs(0, nDocs);
int nDocsReturned = topDocs.scoreDocs.length;
int[] ids = new int[nDocsReturned];
for (int i = 0; i < nDocsReturned; i++) {
ScoreDoc scoreDoc = topDocs.scoreDocs[i];
ids[i] = scoreDoc.doc;
}
assert topDocs.totalHits.relation == TotalHits.Relation.EQUAL_TO;
qr.getDocListAndSet().docList = new DocSlice(0, nDocsReturned, ids, null, topDocs.totalHits.value, 0.0f, topDocs.totalHits.relation);
populateNextCursorMarkFromTopDocs(qr, cmd, topDocs);
}
/**
* Returns the number of documents that match both <code>a</code> and <code>b</code>.
* <p>
* This method is cache-aware and may check as well as modify the cache.
*
* @return the number of documents in the intersection between <code>a</code> and <code>b</code>.
* @throws IOException
* If there is a low-level I/O error.
*/
public int numDocs(Query a, DocSet b) throws IOException {
if (b.size() == 0) {
return 0;
}
if (filterCache != null) {
// Negative query if absolute value different from original
Query absQ = QueryUtils.getAbs(a);
DocSet positiveA = getPositiveDocSet(absQ);
return a == absQ ? b.intersectionSize(positiveA) : b.andNotSize(positiveA);
} else {
// If there isn't a cache, then do a single filtered query
// NOTE: we cannot use FilteredQuery, because BitDocSet assumes it will never
// have deleted documents, but UninvertedField's doNegative has sets with deleted docs
TotalHitCountCollector collector = new TotalHitCountCollector();
BooleanQuery.Builder bq = new BooleanQuery.Builder();
bq.add(QueryUtils.makeQueryable(a), BooleanClause.Occur.MUST);
bq.add(new ConstantScoreQuery(b.getTopFilter()), BooleanClause.Occur.MUST);
super.search(bq.build(), collector);
return collector.getTotalHits();
}
}
/** @lucene.internal */
public int numDocs(DocSet a, DocsEnumState deState) throws IOException {
// Negative query if absolute value different from original
return a.intersectionSize(getDocSet(deState));
}
public static class DocsEnumState {
public String fieldName; // currently interned for as long as lucene requires it
public TermsEnum termsEnum;
public Bits liveDocs;
public PostingsEnum postingsEnum;
public int minSetSizeCached;
public int[] scratch;
}
/**
* Returns the number of documents that match both <code>a</code> and <code>b</code>.
* <p>
* This method is cache-aware and may check as well as modify the cache.
*
* @return the number of documents in the intersection between <code>a</code> and <code>b</code>.
* @throws IOException
* If there is a low-level I/O error.
*/
public int numDocs(Query a, Query b) throws IOException {
Query absA = QueryUtils.getAbs(a);
Query absB = QueryUtils.getAbs(b);
DocSet positiveA = getPositiveDocSet(absA);
DocSet positiveB = getPositiveDocSet(absB);
// Negative query if absolute value different from original
if (a == absA) {
if (b == absB) return positiveA.intersectionSize(positiveB);
return positiveA.andNotSize(positiveB);
}
if (b == absB) return positiveB.andNotSize(positiveA);
// if both negative, we need to create a temp DocSet since we
// don't have a counting method that takes three.
DocSet all = getLiveDocSet();
// -a -b == *:*.andNot(a).andNotSize(b) == *.*.andNotSize(a.union(b))
// we use the last form since the intermediate DocSet should normally be smaller.
return all.andNotSize(positiveA.union(positiveB));
}
/** @lucene.internal */
public boolean intersects(DocSet a, DocsEnumState deState) throws IOException {
return a.intersects(getDocSet(deState));
}
/**
* Warm this searcher based on an old one (primarily for auto-cache warming).
*/
@SuppressWarnings({"unchecked"})
public void warm(SolrIndexSearcher old) {
// Make sure this is first! filters can help queryResults execute!
long warmingStartTime = System.nanoTime();
// warm the caches in order...
ModifiableSolrParams params = new ModifiableSolrParams();
params.add("warming", "true");
for (int i = 0; i < cacheList.length; i++) {
if (log.isDebugEnabled()) {
log.debug("autowarming [{}] from [{}]\n\t{}", this, old, old.cacheList[i]);
}
final SolrQueryRequest req = new LocalSolrQueryRequest(core, params) {
@Override
public SolrIndexSearcher getSearcher() {
return SolrIndexSearcher.this;
}
@Override
public void close() {}
};
final SolrQueryResponse rsp = new SolrQueryResponse();
SolrRequestInfo.setRequestInfo(new SolrRequestInfo(req, rsp));
try {
cacheList[i].warm(this, old.cacheList[i]);
} finally {
try {
req.close();
} finally {
SolrRequestInfo.clearRequestInfo();
}
}
if (log.isDebugEnabled()) {
log.debug("autowarming result for [{}]\n\t{}", this, cacheList[i]);
}
}
warmupTime = TimeUnit.MILLISECONDS.convert(System.nanoTime() - warmingStartTime, TimeUnit.NANOSECONDS);
}
/**
* return the named generic cache
*/
@SuppressWarnings({"rawtypes"})
public SolrCache getCache(String cacheName) {
return cacheMap.get(cacheName);
}
/**
* lookup an entry in a generic cache
*/
@SuppressWarnings({"unchecked"})
public Object cacheLookup(String cacheName, Object key) {
@SuppressWarnings({"rawtypes"})
SolrCache cache = cacheMap.get(cacheName);
return cache == null ? null : cache.get(key);
}
/**
* insert an entry in a generic cache
*/
@SuppressWarnings({"unchecked"})
public Object cacheInsert(String cacheName, Object key, Object val) {
@SuppressWarnings({"rawtypes"})
SolrCache cache = cacheMap.get(cacheName);
return cache == null ? null : cache.put(key, val);
}
public Date getOpenTimeStamp() {
return openTime;
}
// public but primarily for test case usage
public long getOpenNanoTime() {
return openNanoTime;
}
@Override
public Explanation explain(Query query, int doc) throws IOException {
return super.explain(QueryUtils.makeQueryable(query), doc);
}
/** @lucene.internal
* gets a cached version of the IndexFingerprint for this searcher
**/
public IndexFingerprint getIndexFingerprint(long maxVersion) throws IOException {
final SolrIndexSearcher searcher = this;
final AtomicReference<IOException> exception = new AtomicReference<>();
try {
return searcher.getTopReaderContext().leaves().stream()
.map(ctx -> {
try {
return searcher.getCore().getIndexFingerprint(searcher, ctx, maxVersion);
} catch (IOException e) {
exception.set(e);
return null;
}
})
.filter(java.util.Objects::nonNull)
.reduce(new IndexFingerprint(maxVersion), IndexFingerprint::reduce);
} finally {
if (exception.get() != null) throw exception.get();
}
}
/////////////////////////////////////////////////////////////////////
// SolrInfoBean stuff: Statistics and Module Info
/////////////////////////////////////////////////////////////////////
@Override
public String getName() {
return SolrIndexSearcher.class.getName();
}
@Override
public String getDescription() {
return "index searcher";
}
@Override
public Category getCategory() {
return Category.CORE;
}
@Override
public SolrMetricsContext getSolrMetricsContext() {
return solrMetricsContext;
}
@Override
public void initializeMetrics(SolrMetricsContext parentContext, String scope) {
parentContext.gauge(() -> name, true, "searcherName", Category.SEARCHER.toString(), scope);
parentContext.gauge(() -> cachingEnabled, true, "caching", Category.SEARCHER.toString(), scope);
parentContext.gauge(() -> openTime, true, "openedAt", Category.SEARCHER.toString(), scope);
parentContext.gauge(() -> warmupTime, true, "warmupTime", Category.SEARCHER.toString(), scope);
parentContext.gauge(() -> registerTime, true, "registeredAt", Category.SEARCHER.toString(), scope);
// reader stats
parentContext.gauge(rgauge(-1, () -> reader.numDocs()), true, "numDocs", Category.SEARCHER.toString(), scope);
parentContext.gauge(rgauge(-1, () -> reader.maxDoc()), true, "maxDoc", Category.SEARCHER.toString(), scope);
parentContext.gauge(rgauge(-1, () -> reader.maxDoc() - reader.numDocs()), true, "deletedDocs", Category.SEARCHER.toString(), scope);
parentContext.gauge(rgauge(-1, () -> reader.toString()), true, "reader", Category.SEARCHER.toString(), scope);
parentContext.gauge(rgauge("", () -> reader.directory().toString()), true, "readerDir", Category.SEARCHER.toString(), scope);
parentContext.gauge(rgauge(-1, () -> reader.getVersion()), true, "indexVersion", Category.SEARCHER.toString(), scope);
// size of the currently opened commit
parentContext.gauge(() -> {
try {
Collection<String> files = reader.getIndexCommit().getFileNames();
long total = 0;
for (String file : files) {
total += DirectoryFactory.sizeOf(reader.directory(), file);
}
return total;
} catch (Exception e) {
return -1;
}
}, true, "indexCommitSize", Category.SEARCHER.toString(), scope);
// statsCache metrics
parentContext.gauge(
new MetricsMap((detailed, map) -> {
statsCache.getCacheMetrics().getSnapshot(map::put);
map.put("statsCacheImpl", statsCache.getClass().getSimpleName());
}), true, "statsCache", Category.CACHE.toString(), scope);
}
/**
* wraps a gauge (related to an IndexReader) and swallows any {@link AlreadyClosedException} that
* might be thrown, returning the specified default in it's place.
*/
private <T> Gauge<T> rgauge(T closedDefault, Gauge<T> g) {
return () -> {
try {
return g.getValue();
} catch (AlreadyClosedException ignore) {
return closedDefault;
}
};
}
private static class FilterImpl extends Filter {
private final Filter topFilter;
private final List<Weight> weights;
public FilterImpl(DocSet filter, List<Weight> weights) {
this.weights = weights;
this.topFilter = filter == null ? null : filter.getTopFilter();
}
@Override
public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) throws IOException {
final DocIdSet sub = topFilter == null ? null : topFilter.getDocIdSet(context, acceptDocs);
if (weights.size() == 0) return sub;
return new FilterSet(sub, context);
}
@Override
public String toString(String field) {
return "SolrFilter";
}
@Override
public void visit(QueryVisitor visitor) {
visitor.visitLeaf(this);
}
private class FilterSet extends DocIdSet {
private final DocIdSet docIdSet;
private final LeafReaderContext context;
public FilterSet(DocIdSet docIdSet, LeafReaderContext context) {
this.docIdSet = docIdSet;
this.context = context;
}
@Override
public DocIdSetIterator iterator() throws IOException {
List<DocIdSetIterator> iterators = new ArrayList<>(weights.size() + 1);
if (docIdSet != null) {
final DocIdSetIterator iter = docIdSet.iterator();
if (iter == null) return null;
iterators.add(iter);
}
for (Weight w : weights) {
final Scorer scorer = w.scorer(context);
if (scorer == null) return null;
iterators.add(scorer.iterator());
}
if (iterators.isEmpty()) return null;
if (iterators.size() == 1) return iterators.get(0);
if (iterators.size() == 2) return new DualFilterIterator(iterators.get(0), iterators.get(1));
return new FilterIterator(iterators.toArray(new DocIdSetIterator[iterators.size()]));
}
@Override
public Bits bits() throws IOException {
return null; // don't use random access
}
@Override
public long ramBytesUsed() {
return docIdSet != null ? docIdSet.ramBytesUsed() : 0L;
}
}
private static class FilterIterator extends DocIdSetIterator {
private final DocIdSetIterator[] iterators;
private final DocIdSetIterator first;
public FilterIterator(DocIdSetIterator[] iterators) {
this.iterators = iterators;
this.first = iterators[0];
}
@Override
public int docID() {
return first.docID();
}
private int advanceAllTo(int doc) throws IOException {
int highestDocIter = 0; // index of the iterator with the highest id
int i = 1; // We already advanced the first iterator before calling this method
while (i < iterators.length) {
if (i != highestDocIter) {
final int next = iterators[i].advance(doc);
if (next != doc) { // We need to advance all iterators to a new target
doc = next;
highestDocIter = i;
i = 0;
continue;
}
}
++i;
}
return doc;
}
@Override
public int nextDoc() throws IOException {
return advanceAllTo(first.nextDoc());
}
@Override
public int advance(int target) throws IOException {
return advanceAllTo(first.advance(target));
}
@Override
public long cost() {
return first.cost();
}
}
private static class DualFilterIterator extends DocIdSetIterator {
private final DocIdSetIterator a;
private final DocIdSetIterator b;
public DualFilterIterator(DocIdSetIterator a, DocIdSetIterator b) {
this.a = a;
this.b = b;
}
@Override
public int docID() {
return a.docID();
}
@Override
public int nextDoc() throws IOException {
return doNext(a.nextDoc());
}
@Override
public int advance(int target) throws IOException {
return doNext(a.advance(target));
}
@Override
public long cost() {
return Math.min(a.cost(), b.cost());
}
private int doNext(int doc) throws IOException {
for (;;) {
int other = b.advance(doc);
if (other == doc) return doc;
doc = a.advance(other);
if (other == doc) return doc;
}
}
}
@Override
public boolean equals(Object other) {
return sameClassAs(other) &&
equalsTo(getClass().cast(other));
}
private boolean equalsTo(FilterImpl other) {
return Objects.equals(this.topFilter, other.topFilter) &&
Objects.equals(this.weights, other.weights);
}
@Override
public int hashCode() {
return classHash()
+ 31 * Objects.hashCode(topFilter)
+ 31 * Objects.hashCode(weights);
}
}
public long getWarmupTime() {
return warmupTime;
}
}