blob: eb33b40b74544785292e951636d0f0cb9c68ad44 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import static org.apache.solr.common.params.CursorMarkParams.*;
import org.apache.solr.common.util.Base64;
import org.apache.solr.common.util.JavaBinCodec;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField;
import java.util.List;
import java.util.ArrayList;
import java.io.ByteArrayOutputStream;
import java.io.ByteArrayInputStream;
/**
* An object that encapsulates the basic information about the current Mark Point of a
* "Cursor" based request. <code>CursorMark</code> objects track the sort values of
* the last document returned to a user, so that {@link SolrIndexSearcher} can then
* be asked to find all documents "after" the values represented by this
* <code>CursorMark</code>.
*
*/
public final class CursorMark {
/**
* Used for validation and (un)marshalling of sort values
*/
private final SortSpec sortSpec;
/**
* The raw, unmarshalled, sort values (that corrispond with the SortField's in the
* SortSpec) for knowing which docs this cursor should "search after". If this
* list is null, then we have no specific values to "search after" and we
* should start from the very beginning of the sorted list of documents matching
* the query.
*/
private List<Object> values = null;
/**
* Generates an empty CursorMark bound for use with the
* specified schema and {@link SortSpec}.
*
* @param schema used for basic validation
* @param sortSpec bound to this totem (un)marshalling serialized values
*/
public CursorMark(IndexSchema schema, SortSpec sortSpec) {
final SchemaField uniqueKey = schema.getUniqueKeyField();
if (null == uniqueKey) {
throw new SolrException(ErrorCode.BAD_REQUEST,
"Cursor functionality is not available unless the IndexSchema defines a uniqueKey field");
}
final Sort sort = sortSpec.getSort();
if (null == sort) {
// pure score, by definition we don't include the mandatyr uniqueKey tie breaker
throw new SolrException(ErrorCode.BAD_REQUEST,
"Cursor functionality requires a sort containing a uniqueKey field tie breaker");
}
if (!sortSpec.getSchemaFields().contains(uniqueKey)) {
throw new SolrException(ErrorCode.BAD_REQUEST,
"Cursor functionality requires a sort containing a uniqueKey field tie breaker");
}
if (0 != sortSpec.getOffset()) {
throw new SolrException(ErrorCode.BAD_REQUEST,
"Cursor functionality requires start=0");
}
for (SortField sf : sort.getSort()) {
if (sf.getType().equals(SortField.Type.DOC)) {
throw new SolrException(ErrorCode.BAD_REQUEST,
"Cursor functionality can not be used with internal doc ordering sort: _docid_");
}
}
if (sort.getSort().length != sortSpec.getSchemaFields().size()) {
throw new SolrException(ErrorCode.SERVER_ERROR,
"Cursor SortSpec failure: sort length != SchemaFields: "
+ sort.getSort().length + " != " +
sortSpec.getSchemaFields().size());
}
this.sortSpec = sortSpec;
this.values = null;
}
/**
* Generates an empty CursorMark bound for use with the same {@link SortSpec}
* as the specified existing CursorMark.
*
* @param previous Existing CursorMark whose SortSpec will be reused in the new CursorMark.
* @see #createNext
*/
private CursorMark(CursorMark previous) {
this.sortSpec = previous.sortSpec;
this.values = null;
}
/**
* Generates an new CursorMark bound for use with the same {@link SortSpec}
* as the current CursorMark but using the new SortValues.
*
*/
public CursorMark createNext(List<Object> nextSortValues) {
final CursorMark next = new CursorMark(this);
next.setSortValues(nextSortValues);
return next;
}
/**
* Sets the (raw, unmarshalled) sort values (which must conform to the existing
* sortSpec) to populate this object. If null, then there is nothing to
* "search after" and the "first page" of results should be returned.
*/
public void setSortValues(List<Object> input) {
if (null == input) {
this.values = null;
} else {
if (input.size() != sortSpec.getSort().getSort().length) {
throw new SolrException(ErrorCode.SERVER_ERROR,
"Cursor SortSpec failure: sort values != sort length: "
+ input.size() + " != " + sortSpec.getSort().getSort().length);
}
// defensive copy
this.values = new ArrayList<>(input);
}
}
/**
* Returns a copy of the (raw, unmarshalled) sort values used by this object, or
* null if first page of docs should be returned (ie: no sort after)
*/
public List<Object> getSortValues() {
// defensive copy
return null == this.values ? null : new ArrayList<>(this.values);
}
/**
* Returns the SortSpec used by this object.
*/
public SortSpec getSortSpec() {
return this.sortSpec;
}
/**
* Parses the serialized version of a CursorMark from a client
* (which must conform to the existing sortSpec) and populates this object.
*
* @see #getSerializedTotem
*/
@SuppressWarnings({"unchecked"})
public void parseSerializedTotem(final String serialized) {
if (CURSOR_MARK_START.equals(serialized)) {
values = null;
return;
}
final SortField[] sortFields = sortSpec.getSort().getSort();
final List<SchemaField> schemaFields = sortSpec.getSchemaFields();
List<Object> pieces = null;
try {
final byte[] rawData = Base64.base64ToByteArray(serialized);
try (JavaBinCodec jbc = new JavaBinCodec(); ByteArrayInputStream in = new ByteArrayInputStream(rawData)){
pieces = (List<Object>) jbc.unmarshal(in);
boolean b = false;
for (Object o : pieces) {
if (o instanceof BytesRefBuilder || o instanceof BytesRef || o instanceof String) {
b = true; break;
}
}
if (b) {
in.reset();
pieces = (List<Object>) new JavaBinCodec().unmarshal(in);
}
}
} catch (Exception ex) {
throw new SolrException(ErrorCode.BAD_REQUEST,
"Unable to parse '"+CURSOR_MARK_PARAM+"' after totem: " +
"value must either be '"+CURSOR_MARK_START+"' or the " +
"'"+CURSOR_MARK_NEXT+"' returned by a previous search: "
+ serialized, ex);
}
assert null != pieces : "pieces wasn't parsed, nor exception thrown?";
if (sortFields.length != pieces.size()) {
throw new SolrException(ErrorCode.BAD_REQUEST,
CURSOR_MARK_PARAM+" does not work with current sort (wrong size): " + serialized);
}
this.values = new ArrayList<>(sortFields.length);
final BytesRef tmpBytes = new BytesRef();
for (int i = 0; i < sortFields.length; i++) {
SortField curSort = sortFields[i];
SchemaField curField = schemaFields.get(i);
Object rawValue = pieces.get(i);
if (null != curField) {
FieldType curType = curField.getType();
rawValue = curType.unmarshalSortValue(rawValue);
}
this.values.add(rawValue);
}
}
/**
* Generates a Base64 encoded serialized representation of the sort values
* encapsulated by this object, for use in cursor requests.
*
* @see #parseSerializedTotem
*/
public String getSerializedTotem() {
if (null == this.values) {
return CURSOR_MARK_START;
}
final List<SchemaField> schemaFields = sortSpec.getSchemaFields();
final ArrayList<Object> marshalledValues = new ArrayList<>(values.size()+1);
for (int i = 0; i < schemaFields.size(); i++) {
SchemaField fld = schemaFields.get(i);
Object safeValue = values.get(i);
if (null != fld) {
FieldType type = fld.getType();
safeValue = type.marshalSortValue(safeValue);
}
marshalledValues.add(safeValue);
}
// TODO: we could also encode info about the SortSpec for error checking:
// the type/name/dir from the SortFields (or a hashCode to act as a checksum)
// could help provide more validation beyond just the number of clauses.
try (JavaBinCodec jbc = new JavaBinCodec(); ByteArrayOutputStream out = new ByteArrayOutputStream(256)) {
jbc.marshal(marshalledValues, out);
byte[] rawData = out.toByteArray();
return Base64.byteArrayToBase64(rawData, 0, rawData.length);
} catch (Exception ex) {
throw new SolrException(ErrorCode.SERVER_ERROR,
"Unable to format search after totem", ex);
}
}
/**
* Returns a synthetically constructed {@link FieldDoc} whose {@link FieldDoc#fields}
* match the values of this object.
* <p>
* Important Notes:
* </p>
* <ul>
* <li>{@link FieldDoc#doc} will always be set to {@link Integer#MAX_VALUE} so
* that the tie breaking logic used by <code>IndexSearcher</code> won't select
* the same doc again based on the internal lucene docId when the Solr
* <code>uniqueKey</code> value is the same.
* </li>
* <li>{@link FieldDoc#score} will always be set to 0.0F since it is not used
* when applying <code>searchAfter</code> logic. (Even if the sort values themselves
* contain scores which are used in the sort)
* </li>
* </ul>
*
* @return a {@link FieldDoc} to "search after" or null if the initial
* page of results is requested.
*/
public FieldDoc getSearchAfterFieldDoc() {
if (null == values) return null;
return new FieldDoc(Integer.MAX_VALUE, 0.0F, values.toArray());
}
}