solr/core/src/java/org/apache/solr/search/CursorMark.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.solr.search;

 import org.apache.lucene.search.FieldDoc;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.SolrException.ErrorCode;

 import static org.apache.solr.common.params.CursorMarkParams.*;

 import org.apache.solr.common.util.Base64;
 import org.apache.solr.common.util.JavaBinCodec;
 import org.apache.solr.schema.IndexSchema;
 import org.apache.solr.schema.FieldType;
 import org.apache.solr.schema.SchemaField;

 import java.util.List;
 import java.util.ArrayList;
 import java.io.ByteArrayOutputStream;
 import java.io.ByteArrayInputStream;

 /**
  * An object that encapsulates the basic information about the current Mark Point of a
  * "Cursor" based request.  <code>CursorMark</code> objects track the sort values of
  * the last document returned to a user, so that {@link SolrIndexSearcher} can then
  * be asked to find all documents "after" the values represented by this
  * <code>CursorMark</code>.
  *
  */
 public final class CursorMark {

   /**
    * Used for validation and (un)marshalling of sort values
    */
   private final SortSpec sortSpec;

   /**
    * The raw, unmarshalled, sort values (that corrispond with the SortField's in the
    * SortSpec) for knowing which docs this cursor should "search after".  If this
    * list is null, then we have no specific values to "search after" and we
    * should start from the very beginning of the sorted list of documents matching
    * the query.
    */
   private List<Object> values = null;

   /**
    * Generates an empty CursorMark bound for use with the
    * specified schema and {@link SortSpec}.
    *
    * @param schema used for basic validation
    * @param sortSpec bound to this totem (un)marshalling serialized values
    */
   public CursorMark(IndexSchema schema, SortSpec sortSpec) {

     final SchemaField uniqueKey = schema.getUniqueKeyField();
     if (null == uniqueKey) {
       throw new SolrException(ErrorCode.BAD_REQUEST,
                               "Cursor functionality is not available unless the IndexSchema defines a uniqueKey field");
     }

     final Sort sort = sortSpec.getSort();
     if (null == sort) {
       // pure score, by definition we don't include the mandatyr uniqueKey tie breaker
       throw new SolrException(ErrorCode.BAD_REQUEST,
                               "Cursor functionality requires a sort containing a uniqueKey field tie breaker");
     }

     if (!sortSpec.getSchemaFields().contains(uniqueKey)) {
       throw new SolrException(ErrorCode.BAD_REQUEST,
                               "Cursor functionality requires a sort containing a uniqueKey field tie breaker");
     }

     if (0 != sortSpec.getOffset()) {
       throw new SolrException(ErrorCode.BAD_REQUEST,
                               "Cursor functionality requires start=0");
     }

     for (SortField sf : sort.getSort()) {
       if (sf.getType().equals(SortField.Type.DOC)) {
         throw new SolrException(ErrorCode.BAD_REQUEST,
                                 "Cursor functionality can not be used with internal doc ordering sort: _docid_");
       }
     }

     if (sort.getSort().length != sortSpec.getSchemaFields().size()) {
       throw new SolrException(ErrorCode.SERVER_ERROR,
                               "Cursor SortSpec failure: sort length != SchemaFields: "
                               + sort.getSort().length + " != " +
                               sortSpec.getSchemaFields().size());
     }

     this.sortSpec = sortSpec;
     this.values = null;
   }

   /**
    * Generates an empty CursorMark bound for use with the same {@link SortSpec}
    * as the specified existing CursorMark.
    *
    * @param previous Existing CursorMark whose SortSpec will be reused in the new CursorMark.
    * @see #createNext
    */
   private CursorMark(CursorMark previous) {
     this.sortSpec = previous.sortSpec;
     this.values = null;
   }

   /**
    * Generates an new CursorMark bound for use with the same {@link SortSpec}
    * as the current CursorMark but using the new SortValues.
    *
    */
   public CursorMark createNext(List<Object> nextSortValues) {
     final CursorMark next = new CursorMark(this);
     next.setSortValues(nextSortValues);
     return next;
   }


   /**
    * Sets the (raw, unmarshalled) sort values (which must conform to the existing
    * sortSpec) to populate this object.  If null, then there is nothing to
    * "search after" and the "first page" of results should be returned.
    */
   public void setSortValues(List<Object> input) {
     if (null == input) {
       this.values = null;
     } else {
       if (input.size() != sortSpec.getSort().getSort().length) {
         throw new SolrException(ErrorCode.SERVER_ERROR,
                                 "Cursor SortSpec failure: sort values != sort length: "
                                 + input.size() + " != " + sortSpec.getSort().getSort().length);
       }

       // defensive copy
       this.values = new ArrayList<>(input);
     }
   }

   /**
    * Returns a copy of the (raw, unmarshalled) sort values used by this object, or
    * null if first page of docs should be returned (ie: no sort after)
    */
   public List<Object> getSortValues() {
     // defensive copy
     return null == this.values ? null : new ArrayList<>(this.values);
   }

   /**
    * Returns the SortSpec used by this object.
    */
   public SortSpec getSortSpec() {
     return this.sortSpec;
   }

   /**
    * Parses the serialized version of a CursorMark from a client
    * (which must conform to the existing sortSpec) and populates this object.
    *
    * @see #getSerializedTotem
    */
   @SuppressWarnings({"unchecked"})
   public void parseSerializedTotem(final String serialized) {
     if (CURSOR_MARK_START.equals(serialized)) {
       values = null;
       return;
     }
     final SortField[] sortFields = sortSpec.getSort().getSort();
     final List<SchemaField> schemaFields = sortSpec.getSchemaFields();

     List<Object> pieces = null;
     try {
       final byte[] rawData = Base64.base64ToByteArray(serialized);
       try (JavaBinCodec jbc = new JavaBinCodec(); ByteArrayInputStream in = new ByteArrayInputStream(rawData)){
         pieces = (List<Object>) jbc.unmarshal(in);
         boolean b = false;
         for (Object o : pieces) {
           if (o instanceof BytesRefBuilder || o instanceof BytesRef || o instanceof String) {
             b = true; break;
           }
         }
         if (b) {
           in.reset();
           pieces = (List<Object>) new JavaBinCodec().unmarshal(in);
         }
       }
     } catch (Exception ex) {
       throw new SolrException(ErrorCode.BAD_REQUEST,
                               "Unable to parse '"+CURSOR_MARK_PARAM+"' after totem: " +
                               "value must either be '"+CURSOR_MARK_START+"' or the " +
                               "'"+CURSOR_MARK_NEXT+"' returned by a previous search: "
                               + serialized, ex);
     }
     assert null != pieces : "pieces wasn't parsed, nor exception thrown?";

     if (sortFields.length != pieces.size()) {
       throw new SolrException(ErrorCode.BAD_REQUEST,
                               CURSOR_MARK_PARAM+" does not work with current sort (wrong size): " + serialized);
     }


     this.values = new ArrayList<>(sortFields.length);

     final BytesRef tmpBytes = new BytesRef();
     for (int i = 0; i < sortFields.length; i++) {

       SortField curSort = sortFields[i];
       SchemaField curField = schemaFields.get(i);
       Object rawValue = pieces.get(i);

       if (null != curField) {
         FieldType curType = curField.getType();
         rawValue = curType.unmarshalSortValue(rawValue);
       }

       this.values.add(rawValue);
     }
   }

   /**
    * Generates a Base64 encoded serialized representation of the sort values
    * encapsulated by this object, for use in cursor requests.
    *
    * @see #parseSerializedTotem
    */
   public String getSerializedTotem() {
     if (null == this.values) {
       return CURSOR_MARK_START;
     }

     final List<SchemaField> schemaFields = sortSpec.getSchemaFields();
     final ArrayList<Object> marshalledValues = new ArrayList<>(values.size()+1);
     for (int i = 0; i < schemaFields.size(); i++) {
       SchemaField fld = schemaFields.get(i);
       Object safeValue = values.get(i);
       if (null != fld) {
         FieldType type = fld.getType();
         safeValue = type.marshalSortValue(safeValue);
       }
       marshalledValues.add(safeValue);
     }

     // TODO: we could also encode info about the SortSpec for error checking:
     // the type/name/dir from the SortFields (or a hashCode to act as a checksum)
     // could help provide more validation beyond just the number of clauses.

     try (JavaBinCodec jbc = new JavaBinCodec(); ByteArrayOutputStream out = new ByteArrayOutputStream(256)) {
       jbc.marshal(marshalledValues, out);
       byte[] rawData = out.toByteArray();
       return Base64.byteArrayToBase64(rawData, 0, rawData.length);
     } catch (Exception ex) {
       throw new SolrException(ErrorCode.SERVER_ERROR,
                               "Unable to format search after totem", ex);
     }
   }

   /**
    * Returns a synthetically constructed {@link FieldDoc} whose {@link FieldDoc#fields}
    * match the values of this object.
    * <p>
    * Important Notes:
    * </p>
    * <ul>
    *  <li>{@link FieldDoc#doc} will always be set to {@link Integer#MAX_VALUE} so
    *    that the tie breaking logic used by <code>IndexSearcher</code> won't select
    *    the same doc again based on the internal lucene docId when the Solr
    *    <code>uniqueKey</code> value is the same.
    *  </li>
    *  <li>{@link FieldDoc#score} will always be set to 0.0F since it is not used
    *    when applying <code>searchAfter</code> logic. (Even if the sort values themselves
    *    contain scores which are used in the sort)
    *  </li>
    * </ul>
    *
    * @return a {@link FieldDoc} to "search after" or null if the initial
    *         page of results is requested.
    */
   public FieldDoc getSearchAfterFieldDoc() {
     if (null == values) return null;

     return new FieldDoc(Integer.MAX_VALUE, 0.0F, values.toArray());
   }

 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.solr.search;

	import org.apache.lucene.search.FieldDoc;
	import org.apache.lucene.search.Sort;
	import org.apache.lucene.search.SortField;
	import org.apache.lucene.util.BytesRef;
	import org.apache.lucene.util.BytesRefBuilder;
	import org.apache.solr.common.SolrException;
	import org.apache.solr.common.SolrException.ErrorCode;

	import static org.apache.solr.common.params.CursorMarkParams.*;

	import org.apache.solr.common.util.Base64;
	import org.apache.solr.common.util.JavaBinCodec;
	import org.apache.solr.schema.IndexSchema;
	import org.apache.solr.schema.FieldType;
	import org.apache.solr.schema.SchemaField;

	import java.util.List;
	import java.util.ArrayList;
	import java.io.ByteArrayOutputStream;
	import java.io.ByteArrayInputStream;

	/**
	* An object that encapsulates the basic information about the current Mark Point of a
	* "Cursor" based request. <code>CursorMark</code> objects track the sort values of
	* the last document returned to a user, so that {@link SolrIndexSearcher} can then
	* be asked to find all documents "after" the values represented by this
	* <code>CursorMark</code>.
	*
	*/
	public final class CursorMark {

	/**
	* Used for validation and (un)marshalling of sort values
	*/
	private final SortSpec sortSpec;

	/**
	* The raw, unmarshalled, sort values (that corrispond with the SortField's in the
	* SortSpec) for knowing which docs this cursor should "search after". If this
	* list is null, then we have no specific values to "search after" and we
	* should start from the very beginning of the sorted list of documents matching
	* the query.
	*/
	private List<Object> values = null;

	/**
	* Generates an empty CursorMark bound for use with the
	* specified schema and {@link SortSpec}.
	*
	* @param schema used for basic validation
	* @param sortSpec bound to this totem (un)marshalling serialized values
	*/
	public CursorMark(IndexSchema schema, SortSpec sortSpec) {

	final SchemaField uniqueKey = schema.getUniqueKeyField();
	if (null == uniqueKey) {
	throw new SolrException(ErrorCode.BAD_REQUEST,
	"Cursor functionality is not available unless the IndexSchema defines a uniqueKey field");
	}

	final Sort sort = sortSpec.getSort();
	if (null == sort) {
	// pure score, by definition we don't include the mandatyr uniqueKey tie breaker
	throw new SolrException(ErrorCode.BAD_REQUEST,
	"Cursor functionality requires a sort containing a uniqueKey field tie breaker");
	}

	if (!sortSpec.getSchemaFields().contains(uniqueKey)) {
	throw new SolrException(ErrorCode.BAD_REQUEST,
	"Cursor functionality requires a sort containing a uniqueKey field tie breaker");
	}

	if (0 != sortSpec.getOffset()) {
	throw new SolrException(ErrorCode.BAD_REQUEST,
	"Cursor functionality requires start=0");
	}

	for (SortField sf : sort.getSort()) {
	if (sf.getType().equals(SortField.Type.DOC)) {
	throw new SolrException(ErrorCode.BAD_REQUEST,
	"Cursor functionality can not be used with internal doc ordering sort: _docid_");
	}
	}

	if (sort.getSort().length != sortSpec.getSchemaFields().size()) {
	throw new SolrException(ErrorCode.SERVER_ERROR,
	"Cursor SortSpec failure: sort length != SchemaFields: "
	+ sort.getSort().length + " != " +
	sortSpec.getSchemaFields().size());
	}

	this.sortSpec = sortSpec;
	this.values = null;
	}

	/**
	* Generates an empty CursorMark bound for use with the same {@link SortSpec}
	* as the specified existing CursorMark.
	*
	* @param previous Existing CursorMark whose SortSpec will be reused in the new CursorMark.
	* @see #createNext
	*/
	private CursorMark(CursorMark previous) {
	this.sortSpec = previous.sortSpec;
	this.values = null;
	}

	/**
	* Generates an new CursorMark bound for use with the same {@link SortSpec}
	* as the current CursorMark but using the new SortValues.
	*
	*/
	public CursorMark createNext(List<Object> nextSortValues) {
	final CursorMark next = new CursorMark(this);
	next.setSortValues(nextSortValues);
	return next;
	}


	/**
	* Sets the (raw, unmarshalled) sort values (which must conform to the existing
	* sortSpec) to populate this object. If null, then there is nothing to
	* "search after" and the "first page" of results should be returned.
	*/
	public void setSortValues(List<Object> input) {
	if (null == input) {
	this.values = null;
	} else {
	if (input.size() != sortSpec.getSort().getSort().length) {
	throw new SolrException(ErrorCode.SERVER_ERROR,
	"Cursor SortSpec failure: sort values != sort length: "
	+ input.size() + " != " + sortSpec.getSort().getSort().length);
	}

	// defensive copy
	this.values = new ArrayList<>(input);
	}
	}

	/**
	* Returns a copy of the (raw, unmarshalled) sort values used by this object, or
	* null if first page of docs should be returned (ie: no sort after)
	*/
	public List<Object> getSortValues() {
	// defensive copy
	return null == this.values ? null : new ArrayList<>(this.values);
	}

	/**
	* Returns the SortSpec used by this object.
	*/
	public SortSpec getSortSpec() {
	return this.sortSpec;
	}

	/**
	* Parses the serialized version of a CursorMark from a client
	* (which must conform to the existing sortSpec) and populates this object.
	*
	* @see #getSerializedTotem
	*/
	@SuppressWarnings({"unchecked"})
	public void parseSerializedTotem(final String serialized) {
	if (CURSOR_MARK_START.equals(serialized)) {
	values = null;
	return;
	}
	final SortField[] sortFields = sortSpec.getSort().getSort();
	final List<SchemaField> schemaFields = sortSpec.getSchemaFields();

	List<Object> pieces = null;
	try {
	final byte[] rawData = Base64.base64ToByteArray(serialized);
	try (JavaBinCodec jbc = new JavaBinCodec(); ByteArrayInputStream in = new ByteArrayInputStream(rawData)){
	pieces = (List<Object>) jbc.unmarshal(in);
	boolean b = false;
	for (Object o : pieces) {
	if (o instanceof BytesRefBuilder \|\| o instanceof BytesRef \|\| o instanceof String) {
	b = true; break;
	}
	}
	if (b) {
	in.reset();
	pieces = (List<Object>) new JavaBinCodec().unmarshal(in);
	}
	}
	} catch (Exception ex) {
	throw new SolrException(ErrorCode.BAD_REQUEST,
	"Unable to parse '"+CURSOR_MARK_PARAM+"' after totem: " +
	"value must either be '"+CURSOR_MARK_START+"' or the " +
	"'"+CURSOR_MARK_NEXT+"' returned by a previous search: "
	+ serialized, ex);
	}
	assert null != pieces : "pieces wasn't parsed, nor exception thrown?";

	if (sortFields.length != pieces.size()) {
	throw new SolrException(ErrorCode.BAD_REQUEST,
	CURSOR_MARK_PARAM+" does not work with current sort (wrong size): " + serialized);
	}


	this.values = new ArrayList<>(sortFields.length);

	final BytesRef tmpBytes = new BytesRef();
	for (int i = 0; i < sortFields.length; i++) {

	SortField curSort = sortFields[i];
	SchemaField curField = schemaFields.get(i);
	Object rawValue = pieces.get(i);

	if (null != curField) {
	FieldType curType = curField.getType();
	rawValue = curType.unmarshalSortValue(rawValue);
	}

	this.values.add(rawValue);
	}
	}

	/**
	* Generates a Base64 encoded serialized representation of the sort values
	* encapsulated by this object, for use in cursor requests.
	*
	* @see #parseSerializedTotem
	*/
	public String getSerializedTotem() {
	if (null == this.values) {
	return CURSOR_MARK_START;
	}

	final List<SchemaField> schemaFields = sortSpec.getSchemaFields();
	final ArrayList<Object> marshalledValues = new ArrayList<>(values.size()+1);
	for (int i = 0; i < schemaFields.size(); i++) {
	SchemaField fld = schemaFields.get(i);
	Object safeValue = values.get(i);
	if (null != fld) {
	FieldType type = fld.getType();
	safeValue = type.marshalSortValue(safeValue);
	}
	marshalledValues.add(safeValue);
	}

	// TODO: we could also encode info about the SortSpec for error checking:
	// the type/name/dir from the SortFields (or a hashCode to act as a checksum)
	// could help provide more validation beyond just the number of clauses.

	try (JavaBinCodec jbc = new JavaBinCodec(); ByteArrayOutputStream out = new ByteArrayOutputStream(256)) {
	jbc.marshal(marshalledValues, out);
	byte[] rawData = out.toByteArray();
	return Base64.byteArrayToBase64(rawData, 0, rawData.length);
	} catch (Exception ex) {
	throw new SolrException(ErrorCode.SERVER_ERROR,
	"Unable to format search after totem", ex);
	}
	}

	/**
	* Returns a synthetically constructed {@link FieldDoc} whose {@link FieldDoc#fields}
	* match the values of this object.
	* <p>
	* Important Notes:
	* </p>
	* <ul>
	* <li>{@link FieldDoc#doc} will always be set to {@link Integer#MAX_VALUE} so
	* that the tie breaking logic used by <code>IndexSearcher</code> won't select
	* the same doc again based on the internal lucene docId when the Solr
	* <code>uniqueKey</code> value is the same.
	* </li>
	* <li>{@link FieldDoc#score} will always be set to 0.0F since it is not used
	* when applying <code>searchAfter</code> logic. (Even if the sort values themselves
	* contain scores which are used in the sort)
	* </li>
	* </ul>
	*
	* @return a {@link FieldDoc} to "search after" or null if the initial
	* page of results is requested.
	*/
	public FieldDoc getSearchAfterFieldDoc() {
	if (null == values) return null;

	return new FieldDoc(Integer.MAX_VALUE, 0.0F, values.toArray());
	}

	}