| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.solr.search; |
| |
| import org.apache.lucene.search.FieldDoc; |
| import org.apache.lucene.search.Sort; |
| import org.apache.lucene.search.SortField; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.BytesRefBuilder; |
| import org.apache.solr.common.SolrException; |
| import org.apache.solr.common.SolrException.ErrorCode; |
| |
| import static org.apache.solr.common.params.CursorMarkParams.*; |
| |
| import org.apache.solr.common.util.Base64; |
| import org.apache.solr.common.util.JavaBinCodec; |
| import org.apache.solr.schema.IndexSchema; |
| import org.apache.solr.schema.FieldType; |
| import org.apache.solr.schema.SchemaField; |
| |
| import java.util.List; |
| import java.util.ArrayList; |
| import java.io.ByteArrayOutputStream; |
| import java.io.ByteArrayInputStream; |
| |
| /** |
| * An object that encapsulates the basic information about the current Mark Point of a |
| * "Cursor" based request. <code>CursorMark</code> objects track the sort values of |
| * the last document returned to a user, so that {@link SolrIndexSearcher} can then |
| * be asked to find all documents "after" the values represented by this |
| * <code>CursorMark</code>. |
| * |
| */ |
| public final class CursorMark { |
| |
| /** |
| * Used for validation and (un)marshalling of sort values |
| */ |
| private final SortSpec sortSpec; |
| |
| /** |
| * The raw, unmarshalled, sort values (that corrispond with the SortField's in the |
| * SortSpec) for knowing which docs this cursor should "search after". If this |
| * list is null, then we have no specific values to "search after" and we |
| * should start from the very beginning of the sorted list of documents matching |
| * the query. |
| */ |
| private List<Object> values = null; |
| |
| /** |
| * Generates an empty CursorMark bound for use with the |
| * specified schema and {@link SortSpec}. |
| * |
| * @param schema used for basic validation |
| * @param sortSpec bound to this totem (un)marshalling serialized values |
| */ |
| public CursorMark(IndexSchema schema, SortSpec sortSpec) { |
| |
| final SchemaField uniqueKey = schema.getUniqueKeyField(); |
| if (null == uniqueKey) { |
| throw new SolrException(ErrorCode.BAD_REQUEST, |
| "Cursor functionality is not available unless the IndexSchema defines a uniqueKey field"); |
| } |
| |
| final Sort sort = sortSpec.getSort(); |
| if (null == sort) { |
| // pure score, by definition we don't include the mandatyr uniqueKey tie breaker |
| throw new SolrException(ErrorCode.BAD_REQUEST, |
| "Cursor functionality requires a sort containing a uniqueKey field tie breaker"); |
| } |
| |
| if (!sortSpec.getSchemaFields().contains(uniqueKey)) { |
| throw new SolrException(ErrorCode.BAD_REQUEST, |
| "Cursor functionality requires a sort containing a uniqueKey field tie breaker"); |
| } |
| |
| if (0 != sortSpec.getOffset()) { |
| throw new SolrException(ErrorCode.BAD_REQUEST, |
| "Cursor functionality requires start=0"); |
| } |
| |
| for (SortField sf : sort.getSort()) { |
| if (sf.getType().equals(SortField.Type.DOC)) { |
| throw new SolrException(ErrorCode.BAD_REQUEST, |
| "Cursor functionality can not be used with internal doc ordering sort: _docid_"); |
| } |
| } |
| |
| if (sort.getSort().length != sortSpec.getSchemaFields().size()) { |
| throw new SolrException(ErrorCode.SERVER_ERROR, |
| "Cursor SortSpec failure: sort length != SchemaFields: " |
| + sort.getSort().length + " != " + |
| sortSpec.getSchemaFields().size()); |
| } |
| |
| this.sortSpec = sortSpec; |
| this.values = null; |
| } |
| |
| /** |
| * Generates an empty CursorMark bound for use with the same {@link SortSpec} |
| * as the specified existing CursorMark. |
| * |
| * @param previous Existing CursorMark whose SortSpec will be reused in the new CursorMark. |
| * @see #createNext |
| */ |
| private CursorMark(CursorMark previous) { |
| this.sortSpec = previous.sortSpec; |
| this.values = null; |
| } |
| |
| /** |
| * Generates an new CursorMark bound for use with the same {@link SortSpec} |
| * as the current CursorMark but using the new SortValues. |
| * |
| */ |
| public CursorMark createNext(List<Object> nextSortValues) { |
| final CursorMark next = new CursorMark(this); |
| next.setSortValues(nextSortValues); |
| return next; |
| } |
| |
| |
| /** |
| * Sets the (raw, unmarshalled) sort values (which must conform to the existing |
| * sortSpec) to populate this object. If null, then there is nothing to |
| * "search after" and the "first page" of results should be returned. |
| */ |
| public void setSortValues(List<Object> input) { |
| if (null == input) { |
| this.values = null; |
| } else { |
| if (input.size() != sortSpec.getSort().getSort().length) { |
| throw new SolrException(ErrorCode.SERVER_ERROR, |
| "Cursor SortSpec failure: sort values != sort length: " |
| + input.size() + " != " + sortSpec.getSort().getSort().length); |
| } |
| |
| // defensive copy |
| this.values = new ArrayList<>(input); |
| } |
| } |
| |
| /** |
| * Returns a copy of the (raw, unmarshalled) sort values used by this object, or |
| * null if first page of docs should be returned (ie: no sort after) |
| */ |
| public List<Object> getSortValues() { |
| // defensive copy |
| return null == this.values ? null : new ArrayList<>(this.values); |
| } |
| |
| /** |
| * Returns the SortSpec used by this object. |
| */ |
| public SortSpec getSortSpec() { |
| return this.sortSpec; |
| } |
| |
| /** |
| * Parses the serialized version of a CursorMark from a client |
| * (which must conform to the existing sortSpec) and populates this object. |
| * |
| * @see #getSerializedTotem |
| */ |
| @SuppressWarnings({"unchecked"}) |
| public void parseSerializedTotem(final String serialized) { |
| if (CURSOR_MARK_START.equals(serialized)) { |
| values = null; |
| return; |
| } |
| final SortField[] sortFields = sortSpec.getSort().getSort(); |
| final List<SchemaField> schemaFields = sortSpec.getSchemaFields(); |
| |
| List<Object> pieces = null; |
| try { |
| final byte[] rawData = Base64.base64ToByteArray(serialized); |
| try (JavaBinCodec jbc = new JavaBinCodec(); ByteArrayInputStream in = new ByteArrayInputStream(rawData)){ |
| pieces = (List<Object>) jbc.unmarshal(in); |
| boolean b = false; |
| for (Object o : pieces) { |
| if (o instanceof BytesRefBuilder || o instanceof BytesRef || o instanceof String) { |
| b = true; break; |
| } |
| } |
| if (b) { |
| in.reset(); |
| pieces = (List<Object>) new JavaBinCodec().unmarshal(in); |
| } |
| } |
| } catch (Exception ex) { |
| throw new SolrException(ErrorCode.BAD_REQUEST, |
| "Unable to parse '"+CURSOR_MARK_PARAM+"' after totem: " + |
| "value must either be '"+CURSOR_MARK_START+"' or the " + |
| "'"+CURSOR_MARK_NEXT+"' returned by a previous search: " |
| + serialized, ex); |
| } |
| assert null != pieces : "pieces wasn't parsed, nor exception thrown?"; |
| |
| if (sortFields.length != pieces.size()) { |
| throw new SolrException(ErrorCode.BAD_REQUEST, |
| CURSOR_MARK_PARAM+" does not work with current sort (wrong size): " + serialized); |
| } |
| |
| |
| this.values = new ArrayList<>(sortFields.length); |
| |
| final BytesRef tmpBytes = new BytesRef(); |
| for (int i = 0; i < sortFields.length; i++) { |
| |
| SortField curSort = sortFields[i]; |
| SchemaField curField = schemaFields.get(i); |
| Object rawValue = pieces.get(i); |
| |
| if (null != curField) { |
| FieldType curType = curField.getType(); |
| rawValue = curType.unmarshalSortValue(rawValue); |
| } |
| |
| this.values.add(rawValue); |
| } |
| } |
| |
| /** |
| * Generates a Base64 encoded serialized representation of the sort values |
| * encapsulated by this object, for use in cursor requests. |
| * |
| * @see #parseSerializedTotem |
| */ |
| public String getSerializedTotem() { |
| if (null == this.values) { |
| return CURSOR_MARK_START; |
| } |
| |
| final List<SchemaField> schemaFields = sortSpec.getSchemaFields(); |
| final ArrayList<Object> marshalledValues = new ArrayList<>(values.size()+1); |
| for (int i = 0; i < schemaFields.size(); i++) { |
| SchemaField fld = schemaFields.get(i); |
| Object safeValue = values.get(i); |
| if (null != fld) { |
| FieldType type = fld.getType(); |
| safeValue = type.marshalSortValue(safeValue); |
| } |
| marshalledValues.add(safeValue); |
| } |
| |
| // TODO: we could also encode info about the SortSpec for error checking: |
| // the type/name/dir from the SortFields (or a hashCode to act as a checksum) |
| // could help provide more validation beyond just the number of clauses. |
| |
| try (JavaBinCodec jbc = new JavaBinCodec(); ByteArrayOutputStream out = new ByteArrayOutputStream(256)) { |
| jbc.marshal(marshalledValues, out); |
| byte[] rawData = out.toByteArray(); |
| return Base64.byteArrayToBase64(rawData, 0, rawData.length); |
| } catch (Exception ex) { |
| throw new SolrException(ErrorCode.SERVER_ERROR, |
| "Unable to format search after totem", ex); |
| } |
| } |
| |
| /** |
| * Returns a synthetically constructed {@link FieldDoc} whose {@link FieldDoc#fields} |
| * match the values of this object. |
| * <p> |
| * Important Notes: |
| * </p> |
| * <ul> |
| * <li>{@link FieldDoc#doc} will always be set to {@link Integer#MAX_VALUE} so |
| * that the tie breaking logic used by <code>IndexSearcher</code> won't select |
| * the same doc again based on the internal lucene docId when the Solr |
| * <code>uniqueKey</code> value is the same. |
| * </li> |
| * <li>{@link FieldDoc#score} will always be set to 0.0F since it is not used |
| * when applying <code>searchAfter</code> logic. (Even if the sort values themselves |
| * contain scores which are used in the sort) |
| * </li> |
| * </ul> |
| * |
| * @return a {@link FieldDoc} to "search after" or null if the initial |
| * page of results is requested. |
| */ |
| public FieldDoc getSearchAfterFieldDoc() { |
| if (null == values) return null; |
| |
| return new FieldDoc(Integer.MAX_VALUE, 0.0F, values.toArray()); |
| } |
| |
| } |