blob: 002b19011b8a14841e68cbeb620c9d8a923694ef [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search.stats;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import com.google.common.collect.Lists;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.TermStatistics;
import org.apache.solr.client.solrj.SolrResponse;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.ShardParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.PluginInfo;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.handler.component.ShardRequest;
import org.apache.solr.handler.component.ShardResponse;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.search.SolrIndexSearcher;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This class implements exact caching of statistics. It requires an additional
* round-trip to parse query at shard servers, and return term statistics for
* query terms (and collection statistics for term fields).
*/
public class ExactStatsCache extends StatsCache {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
// experimenting with strategy that takes more RAM, but also doesn't share memory
// across threads
private static final String CURRENT_GLOBAL_COL_STATS = "org.apache.solr.stats.currentGlobalColStats";
private static final String CURRENT_GLOBAL_TERM_STATS = "org.apache.solr.stats.currentGlobalTermStats";
private static final String PER_SHARD_TERM_STATS = "org.apache.solr.stats.perShardTermStats";
private static final String PER_SHARD_COL_STATS = "org.apache.solr.stats.perShardColStats";
@Override
public StatsSource get(SolrQueryRequest req) {
Map<String,CollectionStats> currentGlobalColStats = (Map<String,CollectionStats>) req.getContext().get(CURRENT_GLOBAL_COL_STATS);
Map<String,TermStats> currentGlobalTermStats = (Map<String,TermStats>) req.getContext().get(CURRENT_GLOBAL_TERM_STATS);
if (currentGlobalColStats == null) {
currentGlobalColStats = Collections.emptyMap();
}
if (currentGlobalTermStats == null) {
currentGlobalTermStats = Collections.emptyMap();
}
log.debug("Returning StatsSource. Collection stats={}, Term stats size= {}", currentGlobalColStats, currentGlobalTermStats.size());
return new ExactStatsSource(currentGlobalTermStats, currentGlobalColStats);
}
@Override
public void init(PluginInfo info) {}
@Override
public ShardRequest retrieveStatsRequest(ResponseBuilder rb) {
ShardRequest sreq = new ShardRequest();
sreq.purpose = ShardRequest.PURPOSE_GET_TERM_STATS;
sreq.params = new ModifiableSolrParams(rb.req.getParams());
// don't pass through any shards param
sreq.params.remove(ShardParams.SHARDS);
return sreq;
}
@Override
public void mergeToGlobalStats(SolrQueryRequest req, List<ShardResponse> responses) {
Set<Object> allTerms = new HashSet<>();
for (ShardResponse r : responses) {
log.debug("Merging to global stats, shard={}, response={}", r.getShard(), r.getSolrResponse().getResponse());
String shard = r.getShard();
SolrResponse res = r.getSolrResponse();
NamedList<Object> nl = res.getResponse();
// TODO: nl == null if not all shards respond (no server hosting shard)
String termStatsString = (String) nl.get(TERM_STATS_KEY);
if (termStatsString != null) {
addToPerShardTermStats(req, shard, termStatsString);
}
List<Object> terms = nl.getAll(TERMS_KEY);
allTerms.addAll(terms);
String colStatsString = (String) nl.get(COL_STATS_KEY);
Map<String,CollectionStats> colStats = StatsUtil.colStatsMapFromString(colStatsString);
if (colStats != null) {
addToPerShardColStats(req, shard, colStats);
}
}
if (allTerms.size() > 0) {
req.getContext().put(TERMS_KEY, Lists.newArrayList(allTerms));
}
if (log.isDebugEnabled()) printStats(req);
}
protected void addToPerShardColStats(SolrQueryRequest req, String shard, Map<String,CollectionStats> colStats) {
Map<String,Map<String,CollectionStats>> perShardColStats = (Map<String,Map<String,CollectionStats>>) req.getContext().get(PER_SHARD_COL_STATS);
if (perShardColStats == null) {
perShardColStats = new HashMap<>();
req.getContext().put(PER_SHARD_COL_STATS, perShardColStats);
}
perShardColStats.put(shard, colStats);
}
protected void printStats(SolrQueryRequest req) {
Map<String,Map<String,TermStats>> perShardTermStats = (Map<String,Map<String,TermStats>>) req.getContext().get(PER_SHARD_TERM_STATS);
if (perShardTermStats == null) {
perShardTermStats = Collections.emptyMap();
}
Map<String,Map<String,CollectionStats>> perShardColStats = (Map<String,Map<String,CollectionStats>>) req.getContext().get(PER_SHARD_COL_STATS);
if (perShardColStats == null) {
perShardColStats = Collections.emptyMap();
}
log.debug("perShardColStats={}, perShardTermStats={}", perShardColStats, perShardTermStats);
}
protected void addToPerShardTermStats(SolrQueryRequest req, String shard, String termStatsString) {
Map<String,TermStats> termStats = StatsUtil.termStatsMapFromString(termStatsString);
if (termStats != null) {
Map<String,Map<String,TermStats>> perShardTermStats = (Map<String,Map<String,TermStats>>) req.getContext().get(PER_SHARD_TERM_STATS);
if (perShardTermStats == null) {
perShardTermStats = new HashMap<>();
req.getContext().put(PER_SHARD_TERM_STATS, perShardTermStats);
}
perShardTermStats.put(shard, termStats);
}
}
@Override
public void returnLocalStats(ResponseBuilder rb, SolrIndexSearcher searcher) {
Query q = rb.getQuery();
try {
HashSet<Term> terms = new HashSet<>();
HashMap<String,TermStats> statsMap = new HashMap<>();
HashMap<String,CollectionStats> colMap = new HashMap<>();
IndexSearcher statsCollectingSearcher = new IndexSearcher(searcher.getIndexReader()){
@Override
public CollectionStatistics collectionStatistics(String field) throws IOException {
CollectionStatistics cs = super.collectionStatistics(field);
if (cs != null) {
colMap.put(field, new CollectionStats(cs));
}
return cs;
}
@Override
public TermStatistics termStatistics(Term term, int docFreq, long totalTermFreq) throws IOException {
TermStatistics ts = super.termStatistics(term, docFreq, totalTermFreq);
terms.add(term);
statsMap.put(term.toString(), new TermStats(term.field(), ts));
return ts;
}
};
statsCollectingSearcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE, 1);
for (Term t : terms) {
rb.rsp.add(TERMS_KEY, t.toString());
}
if (statsMap.size() != 0) { //Don't add empty keys
String termStatsString = StatsUtil.termStatsMapToString(statsMap);
rb.rsp.add(TERM_STATS_KEY, termStatsString);
if (log.isDebugEnabled()) {
log.debug("termStats={}, terms={}, numDocs={}", termStatsString, terms, searcher.maxDoc());
}
}
if (colMap.size() != 0){
String colStatsString = StatsUtil.colStatsMapToString(colMap);
rb.rsp.add(COL_STATS_KEY, colStatsString);
if (log.isDebugEnabled()) {
log.debug("collectionStats={}, terms={}, numDocs={}", colStatsString, terms, searcher.maxDoc());
}
}
} catch (IOException e) {
log.error("Error collecting local stats, query='" + q.toString() + "'", e);
throw new SolrException(ErrorCode.SERVER_ERROR, "Error collecting local stats.", e);
}
}
@Override
public void sendGlobalStats(ResponseBuilder rb, ShardRequest outgoing) {
outgoing.purpose |= ShardRequest.PURPOSE_SET_TERM_STATS;
ModifiableSolrParams params = outgoing.params;
List<String> terms = (List<String>) rb.req.getContext().get(TERMS_KEY);
if (terms != null) {
Set<String> fields = new HashSet<>();
for (String t : terms) {
String[] fv = t.split(":");
fields.add(fv[0]);
}
Map<String,TermStats> globalTermStats = new HashMap<>();
Map<String,CollectionStats> globalColStats = new HashMap<>();
// aggregate collection stats, only for the field in terms
for (String shard : rb.shards) {
Map<String,CollectionStats> s = getPerShardColStats(rb, shard);
if (s == null) {
continue;
}
for (Entry<String,CollectionStats> e : s.entrySet()) {
if (!fields.contains(e.getKey())) { // skip non-relevant fields
continue;
}
CollectionStats g = globalColStats.get(e.getKey());
if (g == null) {
g = new CollectionStats(e.getKey());
globalColStats.put(e.getKey(), g);
}
g.add(e.getValue());
}
}
params.add(COL_STATS_KEY, StatsUtil.colStatsMapToString(globalColStats));
// sum up only from relevant shards
for (String t : terms) {
params.add(TERMS_KEY, t);
for (String shard : rb.shards) {
TermStats termStats = getPerShardTermStats(rb.req, t, shard);
if (termStats == null || termStats.docFreq == 0) {
continue;
}
TermStats g = globalTermStats.get(t);
if (g == null) {
g = new TermStats(t);
globalTermStats.put(t, g);
}
g.add(termStats);
}
}
log.debug("terms={}, termStats={}", terms, globalTermStats);
// need global TermStats here...
params.add(TERM_STATS_KEY, StatsUtil.termStatsMapToString(globalTermStats));
}
}
protected Map<String,CollectionStats> getPerShardColStats(ResponseBuilder rb, String shard) {
Map<String,Map<String,CollectionStats>> perShardColStats = (Map<String,Map<String,CollectionStats>>) rb.req.getContext().get(PER_SHARD_COL_STATS);
if (perShardColStats == null) {
perShardColStats = Collections.emptyMap();
}
return perShardColStats.get(shard);
}
protected TermStats getPerShardTermStats(SolrQueryRequest req, String t, String shard) {
Map<String,Map<String,TermStats>> perShardTermStats = (Map<String,Map<String,TermStats>>) req.getContext().get(PER_SHARD_TERM_STATS);
if (perShardTermStats == null) {
perShardTermStats = Collections.emptyMap();
}
Map<String,TermStats> cache = perShardTermStats.get(shard);
return (cache != null) ? cache.get(t) : null; //Term doesn't exist in shard
}
@Override
public void receiveGlobalStats(SolrQueryRequest req) {
String globalTermStats = req.getParams().get(TERM_STATS_KEY);
String globalColStats = req.getParams().get(COL_STATS_KEY);
if (globalColStats != null) {
Map<String,CollectionStats> colStats = StatsUtil.colStatsMapFromString(globalColStats);
if (colStats != null) {
for (Entry<String,CollectionStats> e : colStats.entrySet()) {
addToGlobalColStats(req, e);
}
}
}
log.debug("Global collection stats={}", globalColStats);
if (globalTermStats == null) return;
Map<String,TermStats> termStats = StatsUtil.termStatsMapFromString(globalTermStats);
if (termStats != null) {
for (Entry<String,TermStats> e : termStats.entrySet()) {
addToGlobalTermStats(req, e);
}
}
}
protected void addToGlobalColStats(SolrQueryRequest req,
Entry<String,CollectionStats> e) {
Map<String,CollectionStats> currentGlobalColStats = (Map<String,CollectionStats>) req.getContext().get(CURRENT_GLOBAL_COL_STATS);
if (currentGlobalColStats == null) {
currentGlobalColStats = new HashMap<>();
req.getContext().put(CURRENT_GLOBAL_COL_STATS, currentGlobalColStats);
}
currentGlobalColStats.put(e.getKey(), e.getValue());
}
protected void addToGlobalTermStats(SolrQueryRequest req, Entry<String,TermStats> e) {
Map<String,TermStats> currentGlobalTermStats = (Map<String,TermStats>) req.getContext().get(CURRENT_GLOBAL_TERM_STATS);
if (currentGlobalTermStats == null) {
currentGlobalTermStats = new HashMap<>();
req.getContext().put(CURRENT_GLOBAL_TERM_STATS, currentGlobalTermStats);
}
currentGlobalTermStats.put(e.getKey(), e.getValue());
}
protected static class ExactStatsSource extends StatsSource {
private final Map<String,TermStats> termStatsCache;
private final Map<String,CollectionStats> colStatsCache;
public ExactStatsSource(Map<String,TermStats> termStatsCache,
Map<String,CollectionStats> colStatsCache) {
this.termStatsCache = termStatsCache;
this.colStatsCache = colStatsCache;
}
public TermStatistics termStatistics(SolrIndexSearcher localSearcher, Term term, int docFreq, long totalTermFreq)
throws IOException {
TermStats termStats = termStatsCache.get(term.toString());
// TermStats == null is also true if term has no docFreq anyway,
// see returnLocalStats, if docFreq == 0, they are not added anyway
// Not sure we need a warning here
if (termStats == null) {
log.debug("Missing global termStats info for term={}, using local stats", term);
return localSearcher.localTermStatistics(term, docFreq, totalTermFreq);
} else {
return termStats.toTermStatistics();
}
}
@Override
public CollectionStatistics collectionStatistics(SolrIndexSearcher localSearcher, String field)
throws IOException {
CollectionStats colStats = colStatsCache.get(field);
if (colStats == null) {
log.debug("Missing global colStats info for field={}, using local", field);
return localSearcher.localCollectionStatistics(field);
} else {
return colStats.toCollectionStatistics();
}
}
}
}