lucene/src/java/org/apache/lucene/document/Field.java - lucene-solr - Git at Google

 package org.apache.lucene.document;

 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 import java.io.Reader;

 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.document.NumericField;
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.StringHelper;

 /**
  * A field is a section of a Document. Each field has two parts, a name and a
  * value. Values may be free text, provided as a String or as a Reader, or they
  * may be atomic keywords, which are not further processed. Such keywords may be
  * used to represent dates, urls, etc. Fields are optionally stored in the
  * index, so that they may be returned with hits on the document.
  */

 public class Field implements IndexableField {

   protected FieldType type;
   protected String name = "body";
   // the data object for all different kind of field values
   protected Object fieldsData = null;
   // pre-analyzed tokenStream for indexed fields
   protected TokenStream tokenStream;
   protected boolean isBinary = false;
   // length/offset for all primitive types
   protected int binaryLength;
   protected int binaryOffset;

   protected float boost = 1.0f;

   public Field(String name, FieldType type) {
     this.name = name;
     this.type = type;
   }

   public Field(String name, FieldType type, Reader reader) {
     if (name == null)
       throw new NullPointerException("name cannot be null");
     if (reader == null)
       throw new NullPointerException("reader cannot be null");

     this.name = StringHelper.intern(name);        // field names are interned
     this.fieldsData = reader;
     this.type = type;
   }

   public Field(String name, FieldType type, TokenStream tokenStream) {
     if (name == null)
       throw new NullPointerException("name cannot be null");
     if (tokenStream == null)
       throw new NullPointerException("tokenStream cannot be null");

     this.name = StringHelper.intern(name);        // field names are interned
     this.fieldsData = null;
     this.tokenStream = tokenStream;
     this.type = type;
   }

   public Field(String name, FieldType type, byte[] value) {
     this(name, type, value, 0, value.length);
   }

   public Field(String name, FieldType type, byte[] value, int offset, int length) {
     this.isBinary = true;
     this.fieldsData = value;
     this.type = type;
     this.binaryOffset = offset;
     this.binaryLength = length;
     this.name = StringHelper.intern(name);
   }

   public Field(String name, FieldType type, String value) {
     this(name, true, type, value);
   }

   public Field(String name, boolean internName, FieldType type, String value) {
     if (name == null) {
       throw new IllegalArgumentException("name cannot be null");
     }
     if (value == null) {
       throw new IllegalArgumentException("value cannot be null");
     }
     if (!type.stored() && !type.indexed()) {
       throw new IllegalArgumentException("it doesn't make sense to have a field that "
         + "is neither indexed nor stored");
     }
     if (!type.indexed() && !type.tokenized() && (type.storeTermVectors())) {
       throw new IllegalArgumentException("cannot store term vector information "
           + "for a field that is not indexed");
     }

     this.type = type;
     this.name = name;
     this.fieldsData = value;

     if (internName) // field names are optionally interned
       name = StringHelper.intern(name);
   }

   public boolean isNumeric() {
     return false;
   }

   /**
    * The value of the field as a String, or null. If null, the Reader value or
    * binary value is used. Exactly one of stringValue(), readerValue(), and
    * getBinaryValue() must be set.
    */
   public String stringValue() {
     return fieldsData instanceof String ? (String) fieldsData : null;
   }

   /**
    * The value of the field as a Reader, or null. If null, the String value or
    * binary value is used. Exactly one of stringValue(), readerValue(), and
    * getBinaryValue() must be set.
    */
   public Reader readerValue() {
     return fieldsData instanceof Reader ? (Reader) fieldsData : null;
   }

   /**
    * The TokesStream for this field to be used when indexing, or null. If null,
    * the Reader value or String value is analyzed to produce the indexed tokens.
    */
   public TokenStream tokenStreamValue() {
     return tokenStream;
   }

   /**
    * <p>
    * Expert: change the value of this field. This can be used during indexing to
    * re-use a single Field instance to improve indexing speed by avoiding GC
    * cost of new'ing and reclaiming Field instances. Typically a single
    * {@link Document} instance is re-used as well. This helps most on small
    * documents.
    * </p>
    *
    * <p>
    * Each Field instance should only be used once within a single
    * {@link Document} instance. See <a
    * href="http://wiki.apache.org/lucene-java/ImproveIndexingSpeed"
    * >ImproveIndexingSpeed</a> for details.
    * </p>
    */
   public void setValue(String value) {
     if (isBinary) {
       throw new IllegalArgumentException(
           "cannot set a String value on a binary field");
     }
     fieldsData = value;
   }

   /**
    * Expert: change the value of this field. See <a
    * href="#setValue(java.lang.String)">setValue(String)</a>.
    */
   public void setValue(Reader value) {
     if (isBinary) {
       throw new IllegalArgumentException(
           "cannot set a Reader value on a binary field");
     }
     if (stored()) {
       throw new IllegalArgumentException(
           "cannot set a Reader value on a stored field");
     }
     fieldsData = value;
   }

   /**
    * Expert: change the value of this field. See <a
    * href="#setValue(java.lang.String)">setValue(String)</a>.
    */
   public void setValue(byte[] value) {
     if (!isBinary) {
       throw new IllegalArgumentException(
           "cannot set a byte[] value on a non-binary field");
     }
     fieldsData = value;
     binaryLength = value.length;
     binaryOffset = 0;
   }

   /**
    * Expert: change the value of this field. See <a
    * href="#setValue(java.lang.String)">setValue(String)</a>.
    */
   public void setValue(byte[] value, int offset, int length) {
     if (!isBinary) {
       throw new IllegalArgumentException(
           "cannot set a byte[] value on a non-binary field");
     }
     fieldsData = value;
     binaryLength = length;
     binaryOffset = offset;
   }

   /**
    * Expert: sets the token stream to be used for indexing and causes
    * isIndexed() and isTokenized() to return true. May be combined with stored
    * values from stringValue() or getBinaryValue()
    */
   public void setTokenStream(TokenStream tokenStream) {
     if (!indexed() || !tokenized()) {
       throw new IllegalArgumentException(
           "cannot set token stream on non indexed and tokenized field");
     }
     this.tokenStream = tokenStream;
   }

   public String name() {
     return name;
   }

   public float boost() {
     return boost;
   }

   /**
    * Sets the boost factor hits on this field. This value will be multiplied
    * into the score of all hits on this this field of this document.
    *
    * <p>
    * Boost is used to compute the norm factor for the field. By default, in the
    * {@link org.apache.lucene.search.Similarity#computeNorm(FieldInvertState)}
    * method, the boost value is multiplied by the length normalization factor
    * and then rounded by
    * {@link org.apache.lucene.search.Similarity#encodeNormValue(float)} before
    * it is stored in the index. One should attempt to ensure that this product
    * does not overflow the range of that encoding.
    *
    * @see org.apache.lucene.search.Similarity#computeNorm(FieldInvertState)
    * @see org.apache.lucene.search.Similarity#encodeNormValue(float)
    */
   public void setBoost(float boost) {
     this.boost = boost;
   }

   public boolean numeric() {
     return false;
   }

   public Number numericValue() {
     return null;
   }

   public NumericField.DataType numericDataType() {
     return null;
   }

   private byte[] getBinaryValue(byte[] result /* unused */) {
     if (isBinary || fieldsData instanceof byte[]) return (byte[]) fieldsData;
     else return null;
   }

   private byte[] getBinaryValue() {
     return getBinaryValue(null);
   }

   public BytesRef binaryValue(BytesRef reuse) {
     final byte[] bytes = getBinaryValue();
     if (bytes != null) {
       if (reuse == null) {
         return new BytesRef(bytes, getBinaryOffset(), getBinaryLength());
       } else {
         reuse.bytes = bytes;
         reuse.offset = getBinaryOffset();
         reuse.length = getBinaryLength();
         return reuse;
       }
     } else {
       return null;
     }
   }

   /**
    * Returns length of byte[] segment that is used as value, if Field is not
    * binary returned value is undefined
    *
    * @return length of byte[] segment that represents this Field value
    */
   private int getBinaryLength() {
     if (isBinary) {
       return binaryLength;
     } else if (fieldsData instanceof byte[]) return ((byte[]) fieldsData).length;
     else return 0;
   }

   /**
    * Returns offset into byte[] segment that is used as value, if Field is not
    * binary returned value is undefined
    *
    * @return index of the first character in byte[] segment that represents this
    *         Field value
    */
   public int getBinaryOffset() {
     return binaryOffset;
   }

   public boolean isBinary() {
     return isBinary;
   }

   /** methods from inner FieldType */

   public boolean stored() {
     return type.stored();
   }

   public boolean indexed() {
     return type.indexed();
   }

   public boolean tokenized() {
     return type.tokenized();
   }

   public boolean omitNorms() {
     return type.omitNorms();
   }

   public boolean omitTermFreqAndPositions() {
     return type.omitTermFreqAndPositions();
   }

   public boolean storeTermVectors() {
     return type.storeTermVectors();
   }

   public boolean storeTermVectorOffsets() {
     return type.storeTermVectorOffsets();
   }

   public boolean storeTermVectorPositions() {
     return type.storeTermVectorPositions();
   }

   public boolean lazy() {
     return type.lazy();
   }

   /** Prints a Field for human consumption. */
   @Override
   public final String toString() {
     StringBuilder result = new StringBuilder();
     result.append(type.toString());
     result.append('<');
     result.append(name);
     result.append(':');

     if (fieldsData != null && type.lazy() == false) {
       result.append(fieldsData);
     }

     result.append('>');
     return result.toString();
   }
 }
	package org.apache.lucene.document;

	/**
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	import java.io.Reader;

	import org.apache.lucene.analysis.TokenStream;
	import org.apache.lucene.document.NumericField;
	import org.apache.lucene.index.IndexableField;
	import org.apache.lucene.util.BytesRef;
	import org.apache.lucene.util.StringHelper;

	/**
	* A field is a section of a Document. Each field has two parts, a name and a
	* value. Values may be free text, provided as a String or as a Reader, or they
	* may be atomic keywords, which are not further processed. Such keywords may be
	* used to represent dates, urls, etc. Fields are optionally stored in the
	* index, so that they may be returned with hits on the document.
	*/

	public class Field implements IndexableField {

	protected FieldType type;
	protected String name = "body";
	// the data object for all different kind of field values
	protected Object fieldsData = null;
	// pre-analyzed tokenStream for indexed fields
	protected TokenStream tokenStream;
	protected boolean isBinary = false;
	// length/offset for all primitive types
	protected int binaryLength;
	protected int binaryOffset;

	protected float boost = 1.0f;

	public Field(String name, FieldType type) {
	this.name = name;
	this.type = type;
	}

	public Field(String name, FieldType type, Reader reader) {
	if (name == null)
	throw new NullPointerException("name cannot be null");
	if (reader == null)
	throw new NullPointerException("reader cannot be null");

	this.name = StringHelper.intern(name); // field names are interned
	this.fieldsData = reader;
	this.type = type;
	}

	public Field(String name, FieldType type, TokenStream tokenStream) {
	if (name == null)
	throw new NullPointerException("name cannot be null");
	if (tokenStream == null)
	throw new NullPointerException("tokenStream cannot be null");

	this.name = StringHelper.intern(name); // field names are interned
	this.fieldsData = null;
	this.tokenStream = tokenStream;
	this.type = type;
	}

	public Field(String name, FieldType type, byte[] value) {
	this(name, type, value, 0, value.length);
	}

	public Field(String name, FieldType type, byte[] value, int offset, int length) {
	this.isBinary = true;
	this.fieldsData = value;
	this.type = type;
	this.binaryOffset = offset;
	this.binaryLength = length;
	this.name = StringHelper.intern(name);
	}

	public Field(String name, FieldType type, String value) {
	this(name, true, type, value);
	}

	public Field(String name, boolean internName, FieldType type, String value) {
	if (name == null) {
	throw new IllegalArgumentException("name cannot be null");
	}
	if (value == null) {
	throw new IllegalArgumentException("value cannot be null");
	}
	if (!type.stored() && !type.indexed()) {
	throw new IllegalArgumentException("it doesn't make sense to have a field that "
	+ "is neither indexed nor stored");
	}
	if (!type.indexed() && !type.tokenized() && (type.storeTermVectors())) {
	throw new IllegalArgumentException("cannot store term vector information "
	+ "for a field that is not indexed");
	}

	this.type = type;
	this.name = name;
	this.fieldsData = value;

	if (internName) // field names are optionally interned
	name = StringHelper.intern(name);
	}

	public boolean isNumeric() {
	return false;
	}

	/**
	* The value of the field as a String, or null. If null, the Reader value or
	* binary value is used. Exactly one of stringValue(), readerValue(), and
	* getBinaryValue() must be set.
	*/
	public String stringValue() {
	return fieldsData instanceof String ? (String) fieldsData : null;
	}

	/**
	* The value of the field as a Reader, or null. If null, the String value or
	* binary value is used. Exactly one of stringValue(), readerValue(), and
	* getBinaryValue() must be set.
	*/
	public Reader readerValue() {
	return fieldsData instanceof Reader ? (Reader) fieldsData : null;
	}

	/**
	* The TokesStream for this field to be used when indexing, or null. If null,
	* the Reader value or String value is analyzed to produce the indexed tokens.
	*/
	public TokenStream tokenStreamValue() {
	return tokenStream;
	}

	/**
	* <p>
	* Expert: change the value of this field. This can be used during indexing to
	* re-use a single Field instance to improve indexing speed by avoiding GC
	* cost of new'ing and reclaiming Field instances. Typically a single
	* {@link Document} instance is re-used as well. This helps most on small
	* documents.
	* </p>
	*
	* <p>
	* Each Field instance should only be used once within a single
	* {@link Document} instance. See <a
	* href="http://wiki.apache.org/lucene-java/ImproveIndexingSpeed"
	* >ImproveIndexingSpeed</a> for details.
	* </p>
	*/
	public void setValue(String value) {
	if (isBinary) {
	throw new IllegalArgumentException(
	"cannot set a String value on a binary field");
	}
	fieldsData = value;
	}

	/**
	* Expert: change the value of this field. See <a
	* href="#setValue(java.lang.String)">setValue(String)</a>.
	*/
	public void setValue(Reader value) {
	if (isBinary) {
	throw new IllegalArgumentException(
	"cannot set a Reader value on a binary field");
	}
	if (stored()) {
	throw new IllegalArgumentException(
	"cannot set a Reader value on a stored field");
	}
	fieldsData = value;
	}

	/**
	* Expert: change the value of this field. See <a
	* href="#setValue(java.lang.String)">setValue(String)</a>.
	*/
	public void setValue(byte[] value) {
	if (!isBinary) {
	throw new IllegalArgumentException(
	"cannot set a byte[] value on a non-binary field");
	}
	fieldsData = value;
	binaryLength = value.length;
	binaryOffset = 0;
	}

	/**
	* Expert: change the value of this field. See <a
	* href="#setValue(java.lang.String)">setValue(String)</a>.
	*/
	public void setValue(byte[] value, int offset, int length) {
	if (!isBinary) {
	throw new IllegalArgumentException(
	"cannot set a byte[] value on a non-binary field");
	}
	fieldsData = value;
	binaryLength = length;
	binaryOffset = offset;
	}

	/**
	* Expert: sets the token stream to be used for indexing and causes
	* isIndexed() and isTokenized() to return true. May be combined with stored
	* values from stringValue() or getBinaryValue()
	*/
	public void setTokenStream(TokenStream tokenStream) {
	if (!indexed() \|\| !tokenized()) {
	throw new IllegalArgumentException(
	"cannot set token stream on non indexed and tokenized field");
	}
	this.tokenStream = tokenStream;
	}

	public String name() {
	return name;
	}

	public float boost() {
	return boost;
	}

	/**
	* Sets the boost factor hits on this field. This value will be multiplied
	* into the score of all hits on this this field of this document.
	*
	* <p>
	* Boost is used to compute the norm factor for the field. By default, in the
	* {@link org.apache.lucene.search.Similarity#computeNorm(FieldInvertState)}
	* method, the boost value is multiplied by the length normalization factor
	* and then rounded by
	* {@link org.apache.lucene.search.Similarity#encodeNormValue(float)} before
	* it is stored in the index. One should attempt to ensure that this product
	* does not overflow the range of that encoding.
	*
	* @see org.apache.lucene.search.Similarity#computeNorm(FieldInvertState)
	* @see org.apache.lucene.search.Similarity#encodeNormValue(float)
	*/
	public void setBoost(float boost) {
	this.boost = boost;
	}

	public boolean numeric() {
	return false;
	}

	public Number numericValue() {
	return null;
	}

	public NumericField.DataType numericDataType() {
	return null;
	}

	private byte[] getBinaryValue(byte[] result /* unused */) {
	if (isBinary \|\| fieldsData instanceof byte[]) return (byte[]) fieldsData;
	else return null;
	}

	private byte[] getBinaryValue() {
	return getBinaryValue(null);
	}

	public BytesRef binaryValue(BytesRef reuse) {
	final byte[] bytes = getBinaryValue();
	if (bytes != null) {
	if (reuse == null) {
	return new BytesRef(bytes, getBinaryOffset(), getBinaryLength());
	} else {
	reuse.bytes = bytes;
	reuse.offset = getBinaryOffset();
	reuse.length = getBinaryLength();
	return reuse;
	}
	} else {
	return null;
	}
	}

	/**
	* Returns length of byte[] segment that is used as value, if Field is not
	* binary returned value is undefined
	*
	* @return length of byte[] segment that represents this Field value
	*/
	private int getBinaryLength() {
	if (isBinary) {
	return binaryLength;
	} else if (fieldsData instanceof byte[]) return ((byte[]) fieldsData).length;
	else return 0;
	}

	/**
	* Returns offset into byte[] segment that is used as value, if Field is not
	* binary returned value is undefined
	*
	* @return index of the first character in byte[] segment that represents this
	* Field value
	*/
	public int getBinaryOffset() {
	return binaryOffset;
	}

	public boolean isBinary() {
	return isBinary;
	}

	/** methods from inner FieldType */

	public boolean stored() {
	return type.stored();
	}

	public boolean indexed() {
	return type.indexed();
	}

	public boolean tokenized() {
	return type.tokenized();
	}

	public boolean omitNorms() {
	return type.omitNorms();
	}

	public boolean omitTermFreqAndPositions() {
	return type.omitTermFreqAndPositions();
	}

	public boolean storeTermVectors() {
	return type.storeTermVectors();
	}

	public boolean storeTermVectorOffsets() {
	return type.storeTermVectorOffsets();
	}

	public boolean storeTermVectorPositions() {
	return type.storeTermVectorPositions();
	}

	public boolean lazy() {
	return type.lazy();
	}

	/** Prints a Field for human consumption. */
	@Override
	public final String toString() {
	StringBuilder result = new StringBuilder();
	result.append(type.toString());
	result.append('<');
	result.append(name);
	result.append(':');

	if (fieldsData != null && type.lazy() == false) {
	result.append(fieldsData);
	}

	result.append('>');
	return result.toString();
	}
	}