blob: 0e716b1012e4e2adf2cb3cf6c72a8125f52eb6ec [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.component;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.*;
import org.apache.lucene.search.grouping.GroupDocs;
import org.apache.lucene.search.grouping.SearchGroup;
import org.apache.lucene.search.grouping.TopGroups;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.*;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.*;
import org.apache.solr.search.grouping.CommandHandler;
import org.apache.solr.search.grouping.GroupingSpecification;
import org.apache.solr.search.grouping.distributed.ShardRequestFactory;
import org.apache.solr.search.grouping.distributed.ShardResponseProcessor;
import org.apache.solr.search.grouping.distributed.command.QueryCommand;
import org.apache.solr.search.grouping.distributed.command.SearchGroupsFieldCommand;
import org.apache.solr.search.grouping.distributed.command.TopGroupsFieldCommand;
import org.apache.solr.search.grouping.distributed.requestfactory.SearchGroupsRequestFactory;
import org.apache.solr.search.grouping.distributed.requestfactory.StoredFieldsShardRequestFactory;
import org.apache.solr.search.grouping.distributed.requestfactory.TopGroupsShardRequestFactory;
import org.apache.solr.search.grouping.distributed.responseprocessor.SearchGroupShardResponseProcessor;
import org.apache.solr.search.grouping.distributed.responseprocessor.StoredFieldsShardResponseProcessor;
import org.apache.solr.search.grouping.distributed.responseprocessor.TopGroupsShardResponseProcessor;
import org.apache.solr.search.grouping.distributed.shardresultserializer.SearchGroupsResultTransformer;
import org.apache.solr.search.grouping.distributed.shardresultserializer.TopGroupsResultTransformer;
import org.apache.solr.search.grouping.endresulttransformer.EndResultTransformer;
import org.apache.solr.search.grouping.endresulttransformer.GroupedEndResultTransformer;
import org.apache.solr.search.grouping.endresulttransformer.MainEndResultTransformer;
import org.apache.solr.search.grouping.endresulttransformer.SimpleEndResultTransformer;
import org.apache.solr.util.SolrPluginUtils;
import java.io.IOException;
import java.net.URL;
import java.util.*;
/**
* TODO!
*
* @version $Id$
* @since solr 1.3
*/
public class QueryComponent extends SearchComponent
{
public static final String COMPONENT_NAME = "query";
@Override
public void prepare(ResponseBuilder rb) throws IOException
{
SolrQueryRequest req = rb.req;
SolrParams params = req.getParams();
if (!params.getBool(COMPONENT_NAME, true)) {
return;
}
SolrQueryResponse rsp = rb.rsp;
// Set field flags
String fl = params.get(CommonParams.FL);
int fieldFlags = 0;
if (fl != null) {
fieldFlags |= SolrPluginUtils.setReturnFields(fl, rsp);
}
rb.setFieldFlags( fieldFlags );
String defType = params.get(QueryParsing.DEFTYPE,QParserPlugin.DEFAULT_QTYPE);
// get it from the response builder to give a different component a chance
// to set it.
String queryString = rb.getQueryString();
if (queryString == null) {
// this is the normal way it's set.
queryString = params.get( CommonParams.Q );
rb.setQueryString(queryString);
}
try {
QParser parser = QParser.getParser(rb.getQueryString(), defType, req);
Query q = parser.getQuery();
if (q == null) {
// normalize a null query to a query that matches nothing
q = new BooleanQuery();
}
rb.setQuery( q );
rb.setSortSpec( parser.getSort(true) );
rb.setQparser(parser);
String[] fqs = req.getParams().getParams(CommonParams.FQ);
if (fqs!=null && fqs.length!=0) {
List<Query> filters = rb.getFilters();
if (filters==null) {
filters = new ArrayList<Query>(fqs.length);
}
for (String fq : fqs) {
if (fq != null && fq.trim().length()!=0) {
QParser fqp = QParser.getParser(fq, null, req);
filters.add(fqp.getQuery());
}
}
// only set the filters if they are not empty otherwise
// fq=&someotherParam= will trigger all docs filter for every request
// if filter cache is disabled
if (!filters.isEmpty()) {
rb.setFilters( filters );
}
}
} catch (ParseException e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
}
// TODO: temporary... this should go in a different component.
String shards = params.get(ShardParams.SHARDS);
if (shards != null) {
List<String> lst = StrUtils.splitSmart(shards, ",", true);
rb.shards = lst.toArray(new String[lst.size()]);
}
String shards_rows = params.get(ShardParams.SHARDS_ROWS);
if(shards_rows != null) {
rb.shards_rows = Integer.parseInt(shards_rows);
}
String shards_start = params.get(ShardParams.SHARDS_START);
if(shards_start != null) {
rb.shards_start = Integer.parseInt(shards_start);
}
boolean grouping = params.getBool(GroupParams.GROUP, false);
if (!grouping) {
return;
}
SolrIndexSearcher.QueryCommand cmd = rb.getQueryCommand();
SolrIndexSearcher searcher = rb.req.getSearcher();
GroupingSpecification groupingSpec = new GroupingSpecification();
rb.setGroupingSpec(groupingSpec);
//TODO: move weighting of sort
Sort groupSort = searcher.weightSort(cmd.getSort());
if (groupSort == null) {
groupSort = Sort.RELEVANCE;
}
// groupSort defaults to sort
String groupSortStr = params.get(GroupParams.GROUP_SORT);
//TODO: move weighting of sort
Sort sortWithinGroup = groupSortStr == null ? groupSort : searcher.weightSort(QueryParsing.parseSort(groupSortStr, req));
if (sortWithinGroup == null) {
sortWithinGroup = Sort.RELEVANCE;
}
groupingSpec.setSortWithinGroup(sortWithinGroup);
groupingSpec.setGroupSort(groupSort);
String formatStr = params.get(GroupParams.GROUP_FORMAT, Grouping.Format.grouped.name());
Grouping.Format responseFormat;
try {
responseFormat = Grouping.Format.valueOf(formatStr);
} catch (IllegalArgumentException e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, String.format("Illegal %s parameter", GroupParams.GROUP_FORMAT));
}
groupingSpec.setResponseFormat(responseFormat);
groupingSpec.setFields(params.getParams(GroupParams.GROUP_FIELD));
groupingSpec.setQueries(params.getParams(GroupParams.GROUP_QUERY));
groupingSpec.setGroupOffset(params.getInt(GroupParams.GROUP_OFFSET, 0));
groupingSpec.setGroupLimit(params.getInt(GroupParams.GROUP_LIMIT, 1));
groupingSpec.setOffset(rb.getSortSpec().getOffset());
groupingSpec.setLimit(rb.getSortSpec().getCount());
groupingSpec.setIncludeGroupCount(params.getBool(GroupParams.GROUP_TOTAL_COUNT, false));
groupingSpec.setMain(params.getBool(GroupParams.GROUP_MAIN, false));
groupingSpec.setNeedScore((cmd.getFlags() & SolrIndexSearcher.GET_SCORES) != 0);
groupingSpec.setTruncateGroups(params.getBool(GroupParams.GROUP_TRUNCATE, false));
}
/**
* Actually run the query
*/
@Override
public void process(ResponseBuilder rb) throws IOException
{
SolrQueryRequest req = rb.req;
SolrQueryResponse rsp = rb.rsp;
SolrParams params = req.getParams();
if (!params.getBool(COMPONENT_NAME, true)) {
return;
}
SolrIndexSearcher searcher = req.getSearcher();
if (rb.getQueryCommand().getOffset() < 0) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "'start' parameter cannot be negative");
}
// -1 as flag if not set.
long timeAllowed = (long)params.getInt( CommonParams.TIME_ALLOWED, -1 );
// Optional: This could also be implemented by the top-level searcher sending
// a filter that lists the ids... that would be transparent to
// the request handler, but would be more expensive (and would preserve score
// too if desired).
String ids = params.get(ShardParams.IDS);
if (ids != null) {
SchemaField idField = req.getSchema().getUniqueKeyField();
List<String> idArr = StrUtils.splitSmart(ids, ",", true);
int[] luceneIds = new int[idArr.size()];
int docs = 0;
for (int i=0; i<idArr.size(); i++) {
int id = req.getSearcher().getFirstMatch(
new Term(idField.getName(), idField.getType().toInternal(idArr.get(i))));
if (id >= 0)
luceneIds[docs++] = id;
}
DocListAndSet res = new DocListAndSet();
res.docList = new DocSlice(0, docs, luceneIds, null, docs, 0);
if (rb.isNeedDocSet()) {
// TODO: create a cache for this!
List<Query> queries = new ArrayList<Query>();
queries.add(rb.getQuery());
List<Query> filters = rb.getFilters();
if (filters != null) queries.addAll(filters);
res.docSet = searcher.getDocSet(queries);
}
rb.setResults(res);
rsp.add("response",rb.getResults().docList);
return;
}
SolrIndexSearcher.QueryCommand cmd = rb.getQueryCommand();
cmd.setTimeAllowed(timeAllowed);
SolrIndexSearcher.QueryResult result = new SolrIndexSearcher.QueryResult();
//
// grouping / field collapsing
//
GroupingSpecification groupingSpec = rb.getGroupingSpec();
if (groupingSpec != null) {
try {
boolean needScores = (cmd.getFlags() & SolrIndexSearcher.GET_SCORES) != 0;
if (params.getBool(GroupParams.GROUP_DISTRIBUTED_FIRST, false)) {
CommandHandler.Builder topsGroupsActionBuilder = new CommandHandler.Builder()
.setQueryCommand(cmd)
.setNeedDocSet(false) // Order matters here
.setIncludeHitCount(true)
.setSearcher(searcher);
for (String field : groupingSpec.getFields()) {
topsGroupsActionBuilder.addCommandField(new SearchGroupsFieldCommand.Builder()
.setField(searcher.getSchema().getField(field))
.setGroupSort(groupingSpec.getGroupSort())
.setTopNGroups(cmd.getOffset() + cmd.getLen())
.build()
);
}
CommandHandler commandHandler = topsGroupsActionBuilder.build();
commandHandler.execute();
SearchGroupsResultTransformer serializer = new SearchGroupsResultTransformer(searcher);
rsp.add("firstPhase", commandHandler.processResult(result, serializer));
rsp.add("totalHitCount", commandHandler.getTotalHitCount());
rb.setResult(result);
return;
} else if (params.getBool(GroupParams.GROUP_DISTRIBUTED_SECOND, false)) {
CommandHandler.Builder secondPhaseBuilder = new CommandHandler.Builder()
.setQueryCommand(cmd)
.setTruncateGroups(groupingSpec.isTruncateGroups() && groupingSpec.getFields().length > 0)
.setSearcher(searcher);
for (String field : groupingSpec.getFields()) {
String[] topGroupsParam = params.getParams(GroupParams.GROUP_DISTRIBUTED_TOPGROUPS_PREFIX + field);
if (topGroupsParam == null) {
topGroupsParam = new String[0];
}
List<SearchGroup<String>> topGroups = new ArrayList<SearchGroup<String>>(topGroupsParam.length);
for (String topGroup : topGroupsParam) {
SearchGroup<String> searchGroup = new SearchGroup<String>();
if (!topGroup.equals(TopGroupsShardRequestFactory.GROUP_NULL_VALUE)) {
searchGroup.groupValue = searcher.getSchema().getField(field).getType().readableToIndexed(topGroup);
}
topGroups.add(searchGroup);
}
secondPhaseBuilder.addCommandField(
new TopGroupsFieldCommand.Builder()
.setField(searcher.getSchema().getField(field))
.setGroupSort(groupingSpec.getGroupSort())
.setSortWithinGroup(groupingSpec.getSortWithinGroup())
.setFirstPhaseGroups(topGroups)
.setMaxDocPerGroup(groupingSpec.getGroupOffset() + groupingSpec.getGroupLimit())
.setNeedScores(needScores)
.setNeedMaxScore(needScores)
.setNeedGroupCount(groupingSpec.isIncludeGroupCount())
.build()
);
}
for (String query : groupingSpec.getQueries()) {
secondPhaseBuilder.addCommandField(new QueryCommand.Builder()
.setDocsToCollect(groupingSpec.getOffset() + groupingSpec.getLimit())
.setSort(groupingSpec.getGroupSort())
.setQuery(query, rb.req)
.setDocSet(searcher)
.build()
);
}
CommandHandler commandHandler = secondPhaseBuilder.build();
commandHandler.execute();
TopGroupsResultTransformer serializer = new TopGroupsResultTransformer(rb);
rsp.add("secondPhase", commandHandler.processResult(result, serializer));
rb.setResult(result);
return;
}
int maxDocsPercentageToCache = params.getInt(GroupParams.GROUP_CACHE_PERCENTAGE, 0);
boolean cacheSecondPassSearch = maxDocsPercentageToCache >= 1 && maxDocsPercentageToCache <= 100;
Grouping.TotalCount defaultTotalCount = groupingSpec.isIncludeGroupCount() ?
Grouping.TotalCount.grouped : Grouping.TotalCount.ungrouped;
int limitDefault = cmd.getLen(); // this is normally from "rows"
Grouping grouping =
new Grouping(searcher, result, cmd, cacheSecondPassSearch, maxDocsPercentageToCache, groupingSpec.isMain());
grouping.setSort(groupingSpec.getGroupSort())
.setGroupSort(groupingSpec.getSortWithinGroup())
.setDefaultFormat(groupingSpec.getResponseFormat())
.setLimitDefault(limitDefault)
.setDefaultTotalCount(defaultTotalCount)
.setDocsPerGroupDefault(groupingSpec.getGroupLimit())
.setGroupOffsetDefault(groupingSpec.getGroupOffset())
.setGetGroupedDocSet(groupingSpec.isTruncateGroups());
if (groupingSpec.getFields() != null) {
for (String field : groupingSpec.getFields()) {
grouping.addFieldCommand(field, rb.req);
}
}
if (groupingSpec.getQueries() != null) {
for (String groupByStr : groupingSpec.getQueries()) {
grouping.addQueryCommand(groupByStr, rb.req);
}
}
if (rb.doHighlights || rb.isDebug() || params.getBool(MoreLikeThisParams.MLT, false)) {
// we need a single list of the returned docs
cmd.setFlags(SolrIndexSearcher.GET_DOCLIST);
}
grouping.execute();
if (grouping.isSignalCacheWarning()) {
rsp.add(
"cacheWarning",
String.format("Cache limit of %d percent relative to maxdoc has exceeded. Please increase cache size or disable caching.", maxDocsPercentageToCache)
);
}
rb.setResult(result);
if (grouping.mainResult != null) {
rsp.add("response", grouping.mainResult);
rsp.getToLog().add("hits", grouping.mainResult.matches());
} else if (!grouping.getCommands().isEmpty()) { // Can never be empty since grouping.execute() checks for this.
rsp.add("grouped", result.groupedResults);
rsp.getToLog().add("hits", grouping.getCommands().get(0).getMatches());
}
return;
} catch (ParseException e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
}
}
searcher.search(result,cmd);
rb.setResult( result );
rsp.add("response",rb.getResults().docList);
rsp.getToLog().add("hits", rb.getResults().docList.matches());
doFieldSortValues(rb, searcher);
doPrefetch(rb);
}
protected void doFieldSortValues(ResponseBuilder rb, SolrIndexSearcher searcher) throws IOException
{
SolrQueryRequest req = rb.req;
SolrQueryResponse rsp = rb.rsp;
// The query cache doesn't currently store sort field values, and SolrIndexSearcher doesn't
// currently have an option to return sort field values. Because of this, we
// take the documents given and re-derive the sort values.
boolean fsv = req.getParams().getBool(ResponseBuilder.FIELD_SORT_VALUES,false);
if(fsv){
Sort sort = searcher.weightSort(rb.getSortSpec().getSort());
SortField[] sortFields = sort==null ? new SortField[]{SortField.FIELD_SCORE} : sort.getSort();
NamedList<List> sortVals = new NamedList<List>(); // order is important for the sort fields
Field field = new Field("dummy", "", Field.Store.YES, Field.Index.NO); // a dummy Field
SolrIndexReader reader = searcher.getReader();
SolrIndexReader[] readers = reader.getLeafReaders();
SolrIndexReader subReader = reader;
if (readers.length==1) {
// if there is a single segment, use that subReader and avoid looking up each time
subReader = readers[0];
readers=null;
}
int[] offsets = reader.getLeafOffsets();
for (SortField sortField: sortFields) {
int type = sortField.getType();
if (type==SortField.SCORE || type==SortField.DOC) continue;
FieldComparator comparator = null;
FieldComparator comparators[] = (readers==null) ? null : new FieldComparator[readers.length];
String fieldname = sortField.getField();
FieldType ft = fieldname==null ? null : req.getSchema().getFieldTypeNoEx(fieldname);
DocList docList = rb.getResults().docList;
ArrayList<Object> vals = new ArrayList<Object>(docList.size());
DocIterator it = rb.getResults().docList.iterator();
int offset = 0;
int idx = 0;
while(it.hasNext()) {
int doc = it.nextDoc();
if (readers != null) {
idx = SolrIndexReader.readerIndex(doc, offsets);
subReader = readers[idx];
offset = offsets[idx];
comparator = comparators[idx];
}
if (comparator == null) {
comparator = sortField.getComparator(1,0);
comparator.setNextReader(subReader, offset);
if (comparators != null)
comparators[idx] = comparator;
}
doc -= offset; // adjust for what segment this is in
comparator.copy(0, doc);
Object val = comparator.value(0);
// Sortable float, double, int, long types all just use a string
// comparator. For these, we need to put the type into a readable
// format. One reason for this is that XML can't represent all
// string values (or even all unicode code points).
// indexedToReadable() should be a no-op and should
// thus be harmless anyway (for all current ways anyway)
if (val instanceof String) {
field.setValue((String)val);
val = ft.toObject(field);
}
vals.add(val);
}
sortVals.add(fieldname, vals);
}
rsp.add("sort_values", sortVals);
}
}
protected void doPrefetch(ResponseBuilder rb) throws IOException
{
SolrQueryRequest req = rb.req;
SolrQueryResponse rsp = rb.rsp;
//pre-fetch returned documents
if (!req.getParams().getBool(ShardParams.IS_SHARD,false) && rb.getResults().docList != null && rb.getResults().docList.size()<=50) {
// TODO: this may depend on the highlighter component (or other components?)
SolrPluginUtils.optimizePreFetchDocs(rb.getResults().docList, rb.getQuery(), req, rsp);
}
}
@Override
public int distributedProcess(ResponseBuilder rb) throws IOException {
if (rb.grouping()) {
return groupedDistributedProcess(rb);
} else {
return regularDistributedProcess(rb);
}
}
private int groupedDistributedProcess(ResponseBuilder rb) {
int nextStage = ResponseBuilder.STAGE_DONE;
ShardRequestFactory shardRequestFactory = null;
if (rb.stage < ResponseBuilder.STAGE_PARSE_QUERY) {
nextStage = ResponseBuilder.STAGE_PARSE_QUERY;
} else if (rb.stage == ResponseBuilder.STAGE_PARSE_QUERY) {
createDistributedIdf(rb);
nextStage = ResponseBuilder.STAGE_TOP_GROUPS;
} else if (rb.stage < ResponseBuilder.STAGE_TOP_GROUPS) {
nextStage = ResponseBuilder.STAGE_TOP_GROUPS;
} else if (rb.stage == ResponseBuilder.STAGE_TOP_GROUPS) {
shardRequestFactory = new SearchGroupsRequestFactory();
nextStage = ResponseBuilder.STAGE_EXECUTE_QUERY;
} else if (rb.stage < ResponseBuilder.STAGE_EXECUTE_QUERY) {
nextStage = ResponseBuilder.STAGE_EXECUTE_QUERY;
} else if (rb.stage == ResponseBuilder.STAGE_EXECUTE_QUERY) {
shardRequestFactory = new TopGroupsShardRequestFactory();
nextStage = ResponseBuilder.STAGE_GET_FIELDS;
} else if (rb.stage < ResponseBuilder.STAGE_GET_FIELDS) {
nextStage = ResponseBuilder.STAGE_GET_FIELDS;
} else if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) {
shardRequestFactory = new StoredFieldsShardRequestFactory();
nextStage = ResponseBuilder.STAGE_DONE;
}
if (shardRequestFactory != null) {
for (ShardRequest shardRequest : shardRequestFactory.constructRequest(rb)) {
rb.addRequest(this, shardRequest);
}
}
return nextStage;
}
private int regularDistributedProcess(ResponseBuilder rb) {
if (rb.stage < ResponseBuilder.STAGE_PARSE_QUERY)
return ResponseBuilder.STAGE_PARSE_QUERY;
if (rb.stage == ResponseBuilder.STAGE_PARSE_QUERY) {
createDistributedIdf(rb);
return ResponseBuilder.STAGE_EXECUTE_QUERY;
}
if (rb.stage < ResponseBuilder.STAGE_EXECUTE_QUERY) return ResponseBuilder.STAGE_EXECUTE_QUERY;
if (rb.stage == ResponseBuilder.STAGE_EXECUTE_QUERY) {
createMainQuery(rb);
return ResponseBuilder.STAGE_GET_FIELDS;
}
if (rb.stage < ResponseBuilder.STAGE_GET_FIELDS) return ResponseBuilder.STAGE_GET_FIELDS;
if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) {
createRetrieveDocs(rb);
return ResponseBuilder.STAGE_DONE;
}
return ResponseBuilder.STAGE_DONE;
}
@Override
public void handleResponses(ResponseBuilder rb, ShardRequest sreq) {
if (rb.grouping()) {
handleGroupedResponses(rb, sreq);
} else {
handleRegularResponses(rb, sreq);
}
}
private void handleGroupedResponses(ResponseBuilder rb, ShardRequest sreq) {
ShardResponseProcessor responseProcessor = null;
if ((sreq.purpose & ShardRequest.PURPOSE_GET_TOP_GROUPS) != 0) {
responseProcessor = new SearchGroupShardResponseProcessor();
} else if ((sreq.purpose & ShardRequest.PURPOSE_GET_TOP_IDS) != 0) {
responseProcessor = new TopGroupsShardResponseProcessor();
} else if ((sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS) != 0) {
responseProcessor = new StoredFieldsShardResponseProcessor();
}
if (responseProcessor != null) {
responseProcessor.process(rb, sreq);
}
}
private void handleRegularResponses(ResponseBuilder rb, ShardRequest sreq) {
if ((sreq.purpose & ShardRequest.PURPOSE_GET_TOP_IDS) != 0) {
mergeIds(rb, sreq);
}
if ((sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS) != 0) {
returnFields(rb, sreq);
}
}
@Override
public void finishStage(ResponseBuilder rb) {
if (rb.stage != ResponseBuilder.STAGE_GET_FIELDS) {
return;
}
if (rb.grouping()) {
groupedFinishStage(rb);
} else {
regularFinishStage(rb);
}
}
private static final EndResultTransformer MAIN_END_RESULT_TRANSFORMER = new MainEndResultTransformer();
private static final EndResultTransformer SIMPLE_END_RESULT_TRANSFORMER = new SimpleEndResultTransformer();
@SuppressWarnings("unchecked")
private void groupedFinishStage(final ResponseBuilder rb) {
// To have same response as non-distributed request.
GroupingSpecification groupSpec = rb.getGroupingSpec();
if (rb.mergedTopGroups.isEmpty()) {
for (String field : groupSpec.getFields()) {
rb.mergedTopGroups.put(field, new TopGroups(null, null, 0, 0, new GroupDocs[]{}));
}
rb.resultIds = new HashMap<Object, ShardDoc>();
}
EndResultTransformer.SolrDocumentSource solrDocumentSource = new EndResultTransformer.SolrDocumentSource() {
public SolrDocument retrieve(ScoreDoc doc) {
ShardDoc solrDoc = (ShardDoc) doc;
return rb.retrievedDocuments.get(solrDoc.id);
}
};
EndResultTransformer endResultTransformer;
if (groupSpec.isMain()) {
endResultTransformer = MAIN_END_RESULT_TRANSFORMER;
} else if (Grouping.Format.grouped == groupSpec.getResponseFormat()) {
endResultTransformer = new GroupedEndResultTransformer(rb.req.getSearcher());
} else if (Grouping.Format.simple == groupSpec.getResponseFormat() && !groupSpec.isMain()) {
endResultTransformer = SIMPLE_END_RESULT_TRANSFORMER;
} else {
return;
}
Map<String, Object> combinedMap = new LinkedHashMap<String, Object>();
combinedMap.putAll(rb.mergedTopGroups);
combinedMap.putAll(rb.mergedQueryCommandResults);
endResultTransformer.transform(combinedMap, rb, solrDocumentSource);
}
private void regularFinishStage(ResponseBuilder rb) {
// We may not have been able to retrieve all the docs due to an
// index change. Remove any null documents.
for (Iterator<SolrDocument> iter = rb._responseDocs.iterator(); iter.hasNext();) {
if (iter.next() == null) {
iter.remove();
rb._responseDocs.setNumFound(rb._responseDocs.getNumFound()-1);
}
}
rb.rsp.add("response", rb._responseDocs);
}
private void createDistributedIdf(ResponseBuilder rb) {
// TODO
}
private void createMainQuery(ResponseBuilder rb) {
ShardRequest sreq = new ShardRequest();
sreq.purpose = ShardRequest.PURPOSE_GET_TOP_IDS;
sreq.params = new ModifiableSolrParams(rb.req.getParams());
// TODO: base on current params or original params?
// don't pass through any shards param
sreq.params.remove(ShardParams.SHARDS);
// set the start (offset) to 0 for each shard request so we can properly merge
// results from the start.
if(rb.shards_start > -1) {
// if the client set shards.start set this explicitly
sreq.params.set(CommonParams.START,rb.shards_start);
} else {
sreq.params.set(CommonParams.START, "0");
}
// TODO: should we even use the SortSpec? That's obtained from the QParser, and
// perhaps we shouldn't attempt to parse the query at this level?
// Alternate Idea: instead of specifying all these things at the upper level,
// we could just specify that this is a shard request.
if(rb.shards_rows > -1) {
// if the client set shards.rows set this explicity
sreq.params.set(CommonParams.ROWS,rb.shards_rows);
} else {
sreq.params.set(CommonParams.ROWS, rb.getSortSpec().getOffset() + rb.getSortSpec().getCount());
}
// in this first phase, request only the unique key field
// and any fields needed for merging.
sreq.params.set(ResponseBuilder.FIELD_SORT_VALUES,"true");
if ( (rb.getFieldFlags() & SolrIndexSearcher.GET_SCORES)!=0 || rb.getSortSpec().includesScore()) {
sreq.params.set(CommonParams.FL, rb.req.getSchema().getUniqueKeyField().getName() + ",score");
} else {
sreq.params.set(CommonParams.FL, rb.req.getSchema().getUniqueKeyField().getName());
}
rb.addRequest(this, sreq);
}
private void mergeIds(ResponseBuilder rb, ShardRequest sreq) {
SortSpec ss = rb.getSortSpec();
Sort sort = ss.getSort();
SortField[] sortFields = null;
if(sort != null) sortFields = sort.getSort();
else {
sortFields = new SortField[]{SortField.FIELD_SCORE};
}
SchemaField uniqueKeyField = rb.req.getSchema().getUniqueKeyField();
// id to shard mapping, to eliminate any accidental dups
HashMap<Object,String> uniqueDoc = new HashMap<Object,String>();
// Merge the docs via a priority queue so we don't have to sort *all* of the
// documents... we only need to order the top (rows+start)
ShardFieldSortedHitQueue queue;
queue = new ShardFieldSortedHitQueue(sortFields, ss.getOffset() + ss.getCount());
long numFound = 0;
Float maxScore=null;
boolean partialResults = false;
for (ShardResponse srsp : sreq.responses) {
SolrDocumentList docs = (SolrDocumentList)srsp.getSolrResponse().getResponse().get("response");
NamedList<?> responseHeader = (NamedList<?>)srsp.getSolrResponse().getResponse().get("responseHeader");
if (responseHeader != null && Boolean.TRUE.equals(responseHeader.get("partialResults"))) {
partialResults = true;
}
// calculate global maxScore and numDocsFound
if (docs.getMaxScore() != null) {
maxScore = maxScore==null ? docs.getMaxScore() : Math.max(maxScore, docs.getMaxScore());
}
numFound += docs.getNumFound();
NamedList sortFieldValues = (NamedList)(srsp.getSolrResponse().getResponse().get("sort_values"));
// go through every doc in this response, construct a ShardDoc, and
// put it in the priority queue so it can be ordered.
for (int i=0; i<docs.size(); i++) {
SolrDocument doc = docs.get(i);
Object id = doc.getFieldValue(uniqueKeyField.getName());
String prevShard = uniqueDoc.put(id, srsp.getShard());
if (prevShard != null) {
// duplicate detected
numFound--;
// For now, just always use the first encountered since we can't currently
// remove the previous one added to the priority queue. If we switched
// to the Java5 PriorityQueue, this would be easier.
continue;
// make which duplicate is used deterministic based on shard
// if (prevShard.compareTo(srsp.shard) >= 0) {
// TODO: remove previous from priority queue
// continue;
// }
}
ShardDoc shardDoc = new ShardDoc();
shardDoc.id = id;
shardDoc.shard = srsp.getShard();
shardDoc.orderInShard = i;
Object scoreObj = doc.getFieldValue("score");
if (scoreObj != null) {
if (scoreObj instanceof String) {
shardDoc.score = Float.parseFloat((String)scoreObj);
} else {
shardDoc.score = (Float)scoreObj;
}
}
shardDoc.sortFieldValues = sortFieldValues;
queue.insertWithOverflow(shardDoc);
} // end for-each-doc-in-response
} // end for-each-response
// The queue now has 0 -> queuesize docs, where queuesize <= start + rows
// So we want to pop the last documents off the queue to get
// the docs offset -> queuesize
int resultSize = queue.size() - ss.getOffset();
resultSize = Math.max(0, resultSize); // there may not be any docs in range
Map<Object,ShardDoc> resultIds = new HashMap<Object,ShardDoc>();
for (int i=resultSize-1; i>=0; i--) {
ShardDoc shardDoc = queue.pop();
shardDoc.positionInResponse = i;
// Need the toString() for correlation with other lists that must
// be strings (like keys in highlighting, explain, etc)
resultIds.put(shardDoc.id.toString(), shardDoc);
}
SolrDocumentList responseDocs = new SolrDocumentList();
if (maxScore!=null) responseDocs.setMaxScore(maxScore);
responseDocs.setNumFound(numFound);
responseDocs.setStart(ss.getOffset());
// size appropriately
for (int i=0; i<resultSize; i++) responseDocs.add(null);
// save these results in a private area so we can access them
// again when retrieving stored fields.
// TODO: use ResponseBuilder (w/ comments) or the request context?
rb.resultIds = resultIds;
rb._responseDocs = responseDocs;
if (partialResults) {
rb.rsp.getResponseHeader().add( "partialResults", Boolean.TRUE );
}
}
private void createRetrieveDocs(ResponseBuilder rb) {
// TODO: in a system with nTiers > 2, we could be passed "ids" here
// unless those requests always go to the final destination shard
// for each shard, collect the documents for that shard.
HashMap<String, Collection<ShardDoc>> shardMap = new HashMap<String,Collection<ShardDoc>>();
for (ShardDoc sdoc : rb.resultIds.values()) {
Collection<ShardDoc> shardDocs = shardMap.get(sdoc.shard);
if (shardDocs == null) {
shardDocs = new ArrayList<ShardDoc>();
shardMap.put(sdoc.shard, shardDocs);
}
shardDocs.add(sdoc);
}
SchemaField uniqueField = rb.req.getSchema().getUniqueKeyField();
// Now create a request for each shard to retrieve the stored fields
for (Collection<ShardDoc> shardDocs : shardMap.values()) {
ShardRequest sreq = new ShardRequest();
sreq.purpose = ShardRequest.PURPOSE_GET_FIELDS;
sreq.shards = new String[] {shardDocs.iterator().next().shard};
sreq.params = new ModifiableSolrParams();
// add original params
sreq.params.add( rb.req.getParams());
// no need for a sort, we already have order
sreq.params.remove(CommonParams.SORT);
// we already have the field sort values
sreq.params.remove(ResponseBuilder.FIELD_SORT_VALUES);
// disable grouping
sreq.params.remove("group");
// make sure that the id is returned for correlation.
String fl = sreq.params.get(CommonParams.FL);
if (fl != null) {
fl = fl.trim();
// currently, "score" is synonymous with "*,score" so
// don't add "id" if the fl is empty or "score" or it would change the meaning.
if (fl.length()!=0 && !"score".equals(fl) && !"*".equals(fl)) {
sreq.params.set(CommonParams.FL, fl+','+uniqueField.getName());
}
}
ArrayList<String> ids = new ArrayList<String>(shardDocs.size());
for (ShardDoc shardDoc : shardDocs) {
// TODO: depending on the type, we may need more tha a simple toString()?
ids.add(shardDoc.id.toString());
}
sreq.params.add(ShardParams.IDS, StrUtils.join(ids, ','));
rb.addRequest(this, sreq);
}
}
private void returnFields(ResponseBuilder rb, ShardRequest sreq) {
// Keep in mind that this could also be a shard in a multi-tiered system.
// TODO: if a multi-tiered system, it seems like some requests
// could/should bypass middlemen (like retrieving stored fields)
// TODO: merge fsv to if requested
if ((sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS) != 0) {
boolean returnScores = (rb.getFieldFlags() & SolrIndexSearcher.GET_SCORES) != 0;
assert(sreq.responses.size() == 1);
ShardResponse srsp = sreq.responses.get(0);
SolrDocumentList docs = (SolrDocumentList)srsp.getSolrResponse().getResponse().get("response");
String keyFieldName = rb.req.getSchema().getUniqueKeyField().getName();
for (SolrDocument doc : docs) {
Object id = doc.getFieldValue(keyFieldName);
ShardDoc sdoc = rb.resultIds.get(id.toString());
if (sdoc != null) {
if (returnScores && sdoc.score != null) {
doc.setField("score", sdoc.score);
}
rb._responseDocs.set(sdoc.positionInResponse, doc);
}
}
}
}
/////////////////////////////////////////////
/// SolrInfoMBean
////////////////////////////////////////////
@Override
public String getDescription() {
return "query";
}
@Override
public String getVersion() {
return "$Revision$";
}
@Override
public String getSourceId() {
return "$Id$";
}
@Override
public String getSource() {
return "$URL$";
}
@Override
public URL[] getDocs() {
return null;
}
}