lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/TermBytes.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.lucene.codecs.uniformsplit;

 import org.apache.lucene.util.Accountable;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.StringHelper;

 /**
  * Term of a block line.
  * <p>
  * Contains the term bytes and the minimal distinguishing prefix (MDP) length
  * of this term.
  * <p>
  * The MDP is the minimal prefix that distinguishes a term from its immediate
  * previous term (terms are alphabetically sorted).
  * <p>
  * The incremental encoding suffix is the suffix starting at the last byte of
  * the MDP (inclusive).
  * <p>
  * Example:
  * For the block
  * <pre>
  * client
  * color
  * company
  * companies
  * </pre>
  * "color" - MDP is "co" - incremental encoding suffix is "olor".
  * <br>
  * "company" - MDP is "com" - incremental encoding suffix is "mpany".
  * <br>
  * "companies" - MDP is "compani" - incremental encoding suffix is "ies".
  *
  * @lucene.experimental
  */
 public class TermBytes implements Accountable {

   private static final long BASE_RAM_USAGE = RamUsageEstimator.shallowSizeOfInstance(TermBytes.class);

   protected int mdpLength;
   protected BytesRef term;

   public TermBytes(int mdpLength, BytesRef term) {
     reset(mdpLength, term);
   }

   public TermBytes reset(int mdpLength, BytesRef term) {
     assert term.length > 0 && mdpLength > 0 || term.length == 0 && mdpLength == 0 : "Inconsistent mdpLength=" + mdpLength + ", term.length=" + term.length;
     assert term.length == 0 || mdpLength <= term.length : "Too large mdpLength=" + mdpLength + ", term.length=" + term.length;
     assert term.offset == 0;
     this.mdpLength = mdpLength;
     this.term = term;
     return this;
   }

   /**
    * @return This term MDP length.
    * @see TermBytes
    */
   public int getMdpLength() {
     return mdpLength;
   }

   /**
    * @return This term bytes.
    */
   public BytesRef getTerm() {
     return term;
   }

   /**
    * @return The offset of this term incremental encoding suffix.
    * @see TermBytes
    */
   public int getSuffixOffset() {
     return Math.max(mdpLength - 1, 0);
   }

   /**
    * @return The length of this term incremental encoding suffix.
    * @see TermBytes
    */
   public int getSuffixLength() {
     return term.length - getSuffixOffset();
   }

   /**
    * Computes the length of the minimal distinguishing prefix (MDP) between
    * a current term and its previous term (terms are alphabetically sorted).
    * <p>
    * Example: If previous="car" and current="cartridge", then MDP length is
    * 4. It is the length of the minimal prefix distinguishing "cartridge" from
    * "car", that is, the length of "cart".
    *
    * @see TermBytes
    */
   public static int computeMdpLength(BytesRef previousTerm, BytesRef currentTerm) {
     int mdpLength = previousTerm == null ? 1 : StringHelper.sortKeyLength(previousTerm, currentTerm);
     return Math.min(mdpLength, currentTerm.length);
   }

   @Override
   public long ramBytesUsed() {
     return BASE_RAM_USAGE + RamUsageUtil.ramBytesUsed(term);
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.lucene.codecs.uniformsplit;

	import org.apache.lucene.util.Accountable;
	import org.apache.lucene.util.BytesRef;
	import org.apache.lucene.util.RamUsageEstimator;
	import org.apache.lucene.util.StringHelper;

	/**
	* Term of a block line.
	* <p>
	* Contains the term bytes and the minimal distinguishing prefix (MDP) length
	* of this term.
	* <p>
	* The MDP is the minimal prefix that distinguishes a term from its immediate
	* previous term (terms are alphabetically sorted).
	* <p>
	* The incremental encoding suffix is the suffix starting at the last byte of
	* the MDP (inclusive).
	* <p>
	* Example:
	* For the block
	* <pre>
	* client
	* color
	* company
	* companies
	* </pre>
	* "color" - MDP is "co" - incremental encoding suffix is "olor".
	* <br>
	* "company" - MDP is "com" - incremental encoding suffix is "mpany".
	* <br>
	* "companies" - MDP is "compani" - incremental encoding suffix is "ies".
	*
	* @lucene.experimental
	*/
	public class TermBytes implements Accountable {

	private static final long BASE_RAM_USAGE = RamUsageEstimator.shallowSizeOfInstance(TermBytes.class);

	protected int mdpLength;
	protected BytesRef term;

	public TermBytes(int mdpLength, BytesRef term) {
	reset(mdpLength, term);
	}

	public TermBytes reset(int mdpLength, BytesRef term) {
	assert term.length > 0 && mdpLength > 0 \|\| term.length == 0 && mdpLength == 0 : "Inconsistent mdpLength=" + mdpLength + ", term.length=" + term.length;
	assert term.length == 0 \|\| mdpLength <= term.length : "Too large mdpLength=" + mdpLength + ", term.length=" + term.length;
	assert term.offset == 0;
	this.mdpLength = mdpLength;
	this.term = term;
	return this;
	}

	/**
	* @return This term MDP length.
	* @see TermBytes
	*/
	public int getMdpLength() {
	return mdpLength;
	}

	/**
	* @return This term bytes.
	*/
	public BytesRef getTerm() {
	return term;
	}

	/**
	* @return The offset of this term incremental encoding suffix.
	* @see TermBytes
	*/
	public int getSuffixOffset() {
	return Math.max(mdpLength - 1, 0);
	}

	/**
	* @return The length of this term incremental encoding suffix.
	* @see TermBytes
	*/
	public int getSuffixLength() {
	return term.length - getSuffixOffset();
	}

	/**
	* Computes the length of the minimal distinguishing prefix (MDP) between
	* a current term and its previous term (terms are alphabetically sorted).
	* <p>
	* Example: If previous="car" and current="cartridge", then MDP length is
	* 4. It is the length of the minimal prefix distinguishing "cartridge" from
	* "car", that is, the length of "cart".
	*
	* @see TermBytes
	*/
	public static int computeMdpLength(BytesRef previousTerm, BytesRef currentTerm) {
	int mdpLength = previousTerm == null ? 1 : StringHelper.sortKeyLength(previousTerm, currentTerm);
	return Math.min(mdpLength, currentTerm.length);
	}

	@Override
	public long ramBytesUsed() {
	return BASE_RAM_USAGE + RamUsageUtil.ramBytesUsed(term);
	}
	}