lucene/core/src/java/org/apache/lucene/index/Term.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.index;


 import java.nio.ByteBuffer;
 import java.nio.charset.CharacterCodingException;
 import java.nio.charset.CharsetDecoder;
 import java.nio.charset.CodingErrorAction;
 import java.nio.charset.StandardCharsets;

 import org.apache.lucene.util.Accountable;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
 import org.apache.lucene.util.RamUsageEstimator;

 /**
   A Term represents a word from text.  This is the unit of search.  It is
   composed of two elements, the text of the word, as a string, and the name of
   the field that the text occurred in.

   Note that terms may represent more than words from text fields, but also
   things like dates, email addresses, urls, etc.  */

 public final class Term implements Comparable<Term>, Accountable {
   private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(Term.class) +
       RamUsageEstimator.shallowSizeOfInstance(BytesRef.class);

   String field;
   BytesRef bytes;

   /** Constructs a Term with the given field and bytes.
    * <p>Note that a null field or null bytes value results in undefined
    * behavior for most Lucene APIs that accept a Term parameter.
    *
    * <p>The provided BytesRef is copied when it is non null.
    */
   public Term(String fld, BytesRef bytes) {
     field = fld;
     this.bytes = bytes == null ? null : BytesRef.deepCopyOf(bytes);
   }

   /** Constructs a Term with the given field and the bytes from a builder.
    * <p>Note that a null field value results in undefined
    * behavior for most Lucene APIs that accept a Term parameter.
    */
   public Term(String fld, BytesRefBuilder bytesBuilder) {
     field = fld;
     this.bytes = bytesBuilder.toBytesRef();
   }

   /** Constructs a Term with the given field and text.
    * <p>Note that a null field or null text value results in undefined
    * behavior for most Lucene APIs that accept a Term parameter. */
   public Term(String fld, String text) {
     this(fld, new BytesRef(text));
   }

   /** Constructs a Term with the given field and empty text.
    * This serves two purposes: 1) reuse of a Term with the same field.
    * 2) pattern for a query.
    *
    * @param fld field's name
    */
   public Term(String fld) {
     this(fld, new BytesRef());
   }

   /** Returns the field of this term.   The field indicates
     the part of a document which this term came from. */
   public final String field() { return field; }

   /** Returns the text of this term.  In the case of words, this is simply the
     text of the word.  In the case of dates and other types, this is an
     encoding of the object as a string.  */
   public final String text() {
     return toString(bytes);
   }

   /** Returns human-readable form of the term text. If the term is not unicode,
    * the raw bytes will be printed instead. */
   public static final String toString(BytesRef termText) {
     // the term might not be text, but usually is. so we make a best effort
     CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder()
         .onMalformedInput(CodingErrorAction.REPORT)
         .onUnmappableCharacter(CodingErrorAction.REPORT);
     try {
       return decoder.decode(ByteBuffer.wrap(termText.bytes, termText.offset, termText.length)).toString();
     } catch (CharacterCodingException e) {
       return termText.toString();
     }
   }

   /** Returns the bytes of this term, these should not be modified. */
   public final BytesRef bytes() { return bytes; }

   @Override
   public boolean equals(Object obj) {
     if (this == obj)
       return true;
     if (obj == null)
       return false;
     if (getClass() != obj.getClass())
       return false;
     Term other = (Term) obj;
     if (field == null) {
       if (other.field != null)
         return false;
     } else if (!field.equals(other.field))
       return false;
     if (bytes == null) {
       if (other.bytes != null)
         return false;
     } else if (!bytes.equals(other.bytes))
       return false;
     return true;
   }

   @Override
   public int hashCode() {
     final int prime = 31;
     int result = 1;
     result = prime * result + ((field == null) ? 0 : field.hashCode());
     result = prime * result + ((bytes == null) ? 0 : bytes.hashCode());
     return result;
   }

   /** Compares two terms, returning a negative integer if this
     term belongs before the argument, zero if this term is equal to the
     argument, and a positive integer if this term belongs after the argument.

     The ordering of terms is first by field, then by text.*/
   @Override
   public final int compareTo(Term other) {
     if (field.equals(other.field)) {
       return bytes.compareTo(other.bytes);
     } else {
       return field.compareTo(other.field);
     }
   }

   /**
    * Resets the field and text of a Term.
    * <p>WARNING: the provided BytesRef is not copied, but used directly.
    * Therefore the bytes should not be modified after construction, for
    * example, you should clone a copy rather than pass reused bytes from
    * a TermsEnum.
    */
   final void set(String fld, BytesRef bytes) {
     field = fld;
     this.bytes = bytes;
   }

   @Override
   public final String toString() { return field + ":" + text(); }

   @Override
   public long ramBytesUsed() {
     return BASE_RAM_BYTES +
         RamUsageEstimator.sizeOfObject(field) +
         (bytes != null ? RamUsageEstimator.alignObjectSize(bytes.bytes.length + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER) : 0L);
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.index;


	import java.nio.ByteBuffer;
	import java.nio.charset.CharacterCodingException;
	import java.nio.charset.CharsetDecoder;
	import java.nio.charset.CodingErrorAction;
	import java.nio.charset.StandardCharsets;

	import org.apache.lucene.util.Accountable;
	import org.apache.lucene.util.BytesRef;
	import org.apache.lucene.util.BytesRefBuilder;
	import org.apache.lucene.util.RamUsageEstimator;

	/**
	A Term represents a word from text. This is the unit of search. It is
	composed of two elements, the text of the word, as a string, and the name of
	the field that the text occurred in.

	Note that terms may represent more than words from text fields, but also
	things like dates, email addresses, urls, etc. */

	public final class Term implements Comparable<Term>, Accountable {
	private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(Term.class) +
	RamUsageEstimator.shallowSizeOfInstance(BytesRef.class);

	String field;
	BytesRef bytes;

	/** Constructs a Term with the given field and bytes.
	* <p>Note that a null field or null bytes value results in undefined
	* behavior for most Lucene APIs that accept a Term parameter.
	*
	* <p>The provided BytesRef is copied when it is non null.
	*/
	public Term(String fld, BytesRef bytes) {
	field = fld;
	this.bytes = bytes == null ? null : BytesRef.deepCopyOf(bytes);
	}

	/** Constructs a Term with the given field and the bytes from a builder.
	* <p>Note that a null field value results in undefined
	* behavior for most Lucene APIs that accept a Term parameter.
	*/
	public Term(String fld, BytesRefBuilder bytesBuilder) {
	field = fld;
	this.bytes = bytesBuilder.toBytesRef();
	}

	/** Constructs a Term with the given field and text.
	* <p>Note that a null field or null text value results in undefined
	* behavior for most Lucene APIs that accept a Term parameter. */
	public Term(String fld, String text) {
	this(fld, new BytesRef(text));
	}

	/** Constructs a Term with the given field and empty text.
	* This serves two purposes: 1) reuse of a Term with the same field.
	* 2) pattern for a query.
	*
	* @param fld field's name
	*/
	public Term(String fld) {
	this(fld, new BytesRef());
	}

	/** Returns the field of this term. The field indicates
	the part of a document which this term came from. */
	public final String field() { return field; }

	/** Returns the text of this term. In the case of words, this is simply the
	text of the word. In the case of dates and other types, this is an
	encoding of the object as a string. */
	public final String text() {
	return toString(bytes);
	}

	/** Returns human-readable form of the term text. If the term is not unicode,
	* the raw bytes will be printed instead. */
	public static final String toString(BytesRef termText) {
	// the term might not be text, but usually is. so we make a best effort
	CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder()
	.onMalformedInput(CodingErrorAction.REPORT)
	.onUnmappableCharacter(CodingErrorAction.REPORT);
	try {
	return decoder.decode(ByteBuffer.wrap(termText.bytes, termText.offset, termText.length)).toString();
	} catch (CharacterCodingException e) {
	return termText.toString();
	}
	}

	/** Returns the bytes of this term, these should not be modified. */
	public final BytesRef bytes() { return bytes; }

	@Override
	public boolean equals(Object obj) {
	if (this == obj)
	return true;
	if (obj == null)
	return false;
	if (getClass() != obj.getClass())
	return false;
	Term other = (Term) obj;
	if (field == null) {
	if (other.field != null)
	return false;
	} else if (!field.equals(other.field))
	return false;
	if (bytes == null) {
	if (other.bytes != null)
	return false;
	} else if (!bytes.equals(other.bytes))
	return false;
	return true;
	}

	@Override
	public int hashCode() {
	final int prime = 31;
	int result = 1;
	result = prime * result + ((field == null) ? 0 : field.hashCode());
	result = prime * result + ((bytes == null) ? 0 : bytes.hashCode());
	return result;
	}

	/** Compares two terms, returning a negative integer if this
	term belongs before the argument, zero if this term is equal to the
	argument, and a positive integer if this term belongs after the argument.

	The ordering of terms is first by field, then by text.*/
	@Override
	public final int compareTo(Term other) {
	if (field.equals(other.field)) {
	return bytes.compareTo(other.bytes);
	} else {
	return field.compareTo(other.field);
	}
	}

	/**
	* Resets the field and text of a Term.
	* <p>WARNING: the provided BytesRef is not copied, but used directly.
	* Therefore the bytes should not be modified after construction, for
	* example, you should clone a copy rather than pass reused bytes from
	* a TermsEnum.
	*/
	final void set(String fld, BytesRef bytes) {
	field = fld;
	this.bytes = bytes;
	}

	@Override
	public final String toString() { return field + ":" + text(); }

	@Override
	public long ramBytesUsed() {
	return BASE_RAM_BYTES +
	RamUsageEstimator.sizeOfObject(field) +
	(bytes != null ? RamUsageEstimator.alignObjectSize(bytes.bytes.length + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER) : 0L);
	}
	}