solr/core/src/java/org/apache/solr/spelling/Token.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.solr.spelling;


 import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
 import org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl;
 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
 import org.apache.lucene.util.AttributeImpl;
 import org.apache.lucene.util.AttributeReflector;
 import org.apache.lucene.util.BytesRef;

 /**
  A Token is an occurrence of a term from the text of a field.  It consists of
  a term's text, the start and end offset of the term in the text of the field,
  and a type string.
  <p>
  The start and end offsets permit applications to re-associate a token with
  its source text, e.g., to display highlighted query terms in a document
  browser, or to show matching text fragments in a <a href="http://en.wikipedia.org/wiki/Key_Word_in_Context">KWIC</a>
  display, etc.
  <p>
  The type is a string, assigned by a lexical analyzer
  (a.k.a. tokenizer), naming the lexical or syntactic class that the token
  belongs to.  For example an end of sentence marker token might be implemented
  with type "eos".  The default token type is "word".
  <p>
  A Token can optionally have metadata (a.k.a. payload) in the form of a variable
  length byte array. Use {@link org.apache.lucene.index.PostingsEnum#getPayload()} to retrieve the
  payloads from the index.

  A few things to note:
  <ul>
  <li>clear() initializes all of the fields to default values. This was changed in contrast to Lucene 2.4, but should affect no one.</li>
  <li>Because <code>TokenStreams</code> can be chained, one cannot assume that the <code>Token's</code> current type is correct.</li>
  <li>The startOffset and endOffset represent the start and offset in the source text, so be careful in adjusting them.</li>
  <li>When caching a reusable token, clone it. When injecting a cached token into a stream that can be reset, clone it again.</li>
  </ul>
  */
 @Deprecated
 public class Token extends PackedTokenAttributeImpl implements FlagsAttribute, PayloadAttribute {

   // TODO Refactor the spellchecker API to use TokenStreams properly, rather than this hack

   private int flags;
   private BytesRef payload;

   /** Constructs a Token will null text. */
   public Token() {
   }

   /** Constructs a Token with the given term text, start
    *  and end offsets.  The type defaults to "word."
    *  <b>NOTE:</b> for better indexing speed you should
    *  instead use the char[] termBuffer methods to set the
    *  term text.
    *  @param text term text
    *  @param start start offset in the source text
    *  @param end end offset in the source text
    */
   public Token(CharSequence text, int start, int end) {
     append(text);
     setOffset(start, end);
   }

   /**
    * {@inheritDoc}
    * @see FlagsAttribute
    */
   @Override
   public int getFlags() {
     return flags;
   }

   /**
    * {@inheritDoc}
    * @see FlagsAttribute
    */
   @Override
   public void setFlags(int flags) {
     this.flags = flags;
   }

   /**
    * {@inheritDoc}
    * @see PayloadAttribute
    */
   @Override
   public BytesRef getPayload() {
     return this.payload;
   }

   /**
    * {@inheritDoc}
    * @see PayloadAttribute
    */
   @Override
   public void setPayload(BytesRef payload) {
     this.payload = payload;
   }

   /** Resets the term text, payload, flags, positionIncrement, positionLength,
    * startOffset, endOffset and token type to default.
    */
   @Override
   public void clear() {
     super.clear();
     flags = 0;
     payload = null;
   }

   @Override
   public boolean equals(Object obj) {
     if (obj == this)
       return true;

     if (obj instanceof Token) {
       final Token other = (Token) obj;
       return (
           flags == other.flags &&
               (payload == null ? other.payload == null : payload.equals(other.payload)) &&
               super.equals(obj)
       );
     } else
       return false;
   }

   @Override
   public int hashCode() {
     int code = super.hashCode();
     code = code * 31 + flags;
     if (payload != null) {
       code = code * 31 + payload.hashCode();
     }
     return code;
   }

   @Override
   public Token clone() {
     final Token t = (Token) super.clone();
     if (payload != null) {
       t.payload = BytesRef.deepCopyOf(payload);
     }
     return t;
   }

   @Override
   public void copyTo(AttributeImpl target) {
     super.copyTo(target);
     ((FlagsAttribute) target).setFlags(flags);
     ((PayloadAttribute) target).setPayload((payload == null) ? null : BytesRef.deepCopyOf(payload));
   }

   @Override
   public void reflectWith(AttributeReflector reflector) {
     super.reflectWith(reflector);
     reflector.reflect(FlagsAttribute.class, "flags", flags);
     reflector.reflect(PayloadAttribute.class, "payload", payload);
   }

 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.solr.spelling;


	import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
	import org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl;
	import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
	import org.apache.lucene.util.AttributeImpl;
	import org.apache.lucene.util.AttributeReflector;
	import org.apache.lucene.util.BytesRef;

	/**
	A Token is an occurrence of a term from the text of a field. It consists of
	a term's text, the start and end offset of the term in the text of the field,
	and a type string.
	<p>
	The start and end offsets permit applications to re-associate a token with
	its source text, e.g., to display highlighted query terms in a document
	browser, or to show matching text fragments in a <a href="http://en.wikipedia.org/wiki/Key_Word_in_Context">KWIC</a>
	display, etc.
	<p>
	The type is a string, assigned by a lexical analyzer
	(a.k.a. tokenizer), naming the lexical or syntactic class that the token
	belongs to. For example an end of sentence marker token might be implemented
	with type "eos". The default token type is "word".
	<p>
	A Token can optionally have metadata (a.k.a. payload) in the form of a variable
	length byte array. Use {@link org.apache.lucene.index.PostingsEnum#getPayload()} to retrieve the
	payloads from the index.

	A few things to note:
	<ul>
	<li>clear() initializes all of the fields to default values. This was changed in contrast to Lucene 2.4, but should affect no one.</li>
	<li>Because <code>TokenStreams</code> can be chained, one cannot assume that the <code>Token's</code> current type is correct.</li>
	<li>The startOffset and endOffset represent the start and offset in the source text, so be careful in adjusting them.</li>
	<li>When caching a reusable token, clone it. When injecting a cached token into a stream that can be reset, clone it again.</li>
	</ul>
	*/
	@Deprecated
	public class Token extends PackedTokenAttributeImpl implements FlagsAttribute, PayloadAttribute {

	// TODO Refactor the spellchecker API to use TokenStreams properly, rather than this hack

	private int flags;
	private BytesRef payload;

	/** Constructs a Token will null text. */
	public Token() {
	}

	/** Constructs a Token with the given term text, start
	* and end offsets. The type defaults to "word."
	* <b>NOTE:</b> for better indexing speed you should
	* instead use the char[] termBuffer methods to set the
	* term text.
	* @param text term text
	* @param start start offset in the source text
	* @param end end offset in the source text
	*/
	public Token(CharSequence text, int start, int end) {
	append(text);
	setOffset(start, end);
	}

	/**
	* {@inheritDoc}
	* @see FlagsAttribute
	*/
	@Override
	public int getFlags() {
	return flags;
	}

	/**
	* {@inheritDoc}
	* @see FlagsAttribute
	*/
	@Override
	public void setFlags(int flags) {
	this.flags = flags;
	}

	/**
	* {@inheritDoc}
	* @see PayloadAttribute
	*/
	@Override
	public BytesRef getPayload() {
	return this.payload;
	}

	/**
	* {@inheritDoc}
	* @see PayloadAttribute
	*/
	@Override
	public void setPayload(BytesRef payload) {
	this.payload = payload;
	}

	/** Resets the term text, payload, flags, positionIncrement, positionLength,
	* startOffset, endOffset and token type to default.
	*/
	@Override
	public void clear() {
	super.clear();
	flags = 0;
	payload = null;
	}

	@Override
	public boolean equals(Object obj) {
	if (obj == this)
	return true;

	if (obj instanceof Token) {
	final Token other = (Token) obj;
	return (
	flags == other.flags &&
	(payload == null ? other.payload == null : payload.equals(other.payload)) &&
	super.equals(obj)
	);
	} else
	return false;
	}

	@Override
	public int hashCode() {
	int code = super.hashCode();
	code = code * 31 + flags;
	if (payload != null) {
	code = code * 31 + payload.hashCode();
	}
	return code;
	}

	@Override
	public Token clone() {
	final Token t = (Token) super.clone();
	if (payload != null) {
	t.payload = BytesRef.deepCopyOf(payload);
	}
	return t;
	}

	@Override
	public void copyTo(AttributeImpl target) {
	super.copyTo(target);
	((FlagsAttribute) target).setFlags(flags);
	((PayloadAttribute) target).setPayload((payload == null) ? null : BytesRef.deepCopyOf(payload));
	}

	@Override
	public void reflectWith(AttributeReflector reflector) {
	super.reflectWith(reflector);
	reflector.reflect(FlagsAttribute.class, "flags", flags);
	reflector.reflect(PayloadAttribute.class, "payload", payload);
	}

	}