docs/attachments/LUCENE-1699/LUCENE-1699.patch - lucene-jira-archive - Git at Google

 Index: CHANGES.txt
 ===================================================================
 --- CHANGES.txt	(revision 786867)
 +++ CHANGES.txt	(working copy)
 @@ -424,7 +424,12 @@

  28. LUCENE-1405: Added support for Ant resource collections in contrib/ant
      <index> task.  (Przemyslaw Sztoch via Erik Hatcher)
 +
 +29. LUCENE-1699: Allow setting a TokenStream on Field/Fieldable for indexing
 +    in conjunction with any other ways to specify stored field values,
 +    currently binary or string values.  (yonik)

 +
  Optimizations

   1. LUCENE-1427: Fixed QueryWrapperFilter to not waste time computing
 Index: src/java/org/apache/lucene/document/Fieldable.java
 ===================================================================
 --- src/java/org/apache/lucene/document/Fieldable.java	(revision 786867)
 +++ src/java/org/apache/lucene/document/Fieldable.java	(working copy)
 @@ -74,36 +74,41 @@
     */
    String name();

 -  /** The value of the field as a String, or null.  If null, the Reader value,
 -   * binary value, or TokenStream value is used.  Exactly one of stringValue(),
 -   * readerValue(), binaryValue(), and tokenStreamValue() must be set. */
 +  /** The value of the field as a String, or null.
 +   * <p>
 +   * For indexing, if isStored()==true, the stringValue() will be used as the stored field value
 +   * unless isBinary()==true, in which case binaryValue() will be used.
 +   *
 +   * If isIndexed()==true and isTokenized()==false, this String value will be indexed as a single token.
 +   * If isIndexed()==true and isTokenized()==true, then tokenStreamValue() will be used to generate indexed tokens if not null,
 +   * else readerValue() will be used to generate indexed tokens if not null, else stringValue() will be used to generate tokens.
 +   */
    public String stringValue();

 -  /** The value of the field as a Reader, or null.  If null, the String value,
 -   * binary value, or TokenStream value is used.  Exactly one of stringValue(),
 -   * readerValue(), binaryValue(), and tokenStreamValue() must be set. */
 +  /** The value of the field as a Reader, which can be used at index time to generate indexed tokens.
 +   * @see #stringValue()
 +   */
    public Reader readerValue();

 -  /** The value of the field in Binary, or null.  If null, the Reader value,
 -   * String value, or TokenStream value is used. Exactly one of stringValue(),
 -   * readerValue(), binaryValue(), and tokenStreamValue() must be set. */
 +  /** The value of the field in Binary, or null.
 +   * @see #stringValue()
 +   */
    public byte[] binaryValue();

 -  /** The value of the field as a TokenStream, or null.  If null, the Reader value,
 -   * String value, or binary value is used. Exactly one of stringValue(),
 -   * readerValue(), binaryValue(), and tokenStreamValue() must be set. */
 +  /** The TokenStream for this field to be used when indexing, or null.
 +   * @see #stringValue()
 +   */
    public TokenStream tokenStreamValue();

 -  /** True iff the value of the field is to be stored in the index for return
 -    with search hits.  It is an error for this to be true if a field is
 -    Reader-valued. */
 +  /** True if the value of the field is to be stored in the index for return
 +    with search hits. */
    boolean  isStored();

 -  /** True iff the value of the field is to be indexed, so that it may be
 +  /** True if the value of the field is to be indexed, so that it may be
      searched on. */
    boolean  isIndexed();

 -  /** True iff the value of the field should be tokenized as text prior to
 +  /** True if the value of the field should be tokenized as text prior to
      indexing.  Un-tokenized fields are indexed as a single word and may not be
      Reader-valued. */
    boolean  isTokenized();
 @@ -111,7 +116,7 @@
    /** True if the value of the field is stored and compressed within the index */
    boolean  isCompressed();

 -  /** True iff the term or terms used to index this field are stored as a term
 +  /** True if the term or terms used to index this field are stored as a term
     *  vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}.
     *  These methods do not provide access to the original content of the field,
     *  only to terms used to index it. If the original content must be
 @@ -122,17 +127,17 @@
    boolean isTermVectorStored();

    /**
 -   * True iff terms are stored as term vector together with their offsets
 +   * True if terms are stored as term vector together with their offsets
     * (start and end positon in source text).
     */
    boolean isStoreOffsetWithTermVector();

    /**
 -   * True iff terms are stored as term vector together with their token positions.
 +   * True if terms are stored as term vector together with their token positions.
     */
    boolean isStorePositionWithTermVector();

 -  /** True iff the value of the filed is stored as binary */
 +  /** True if the value of the field is stored as binary */
    boolean  isBinary();

    /** True if norms are omitted for this indexed field */
 Index: src/java/org/apache/lucene/document/AbstractField.java
 ===================================================================
 --- src/java/org/apache/lucene/document/AbstractField.java	(revision 786867)
 +++ src/java/org/apache/lucene/document/AbstractField.java	(working copy)
 @@ -16,7 +16,8 @@
   */

  import org.apache.lucene.search.PhraseQuery; // for javadocs
 -import org.apache.lucene.search.spans.SpanQuery; // for javadocs
 +import org.apache.lucene.search.spans.SpanQuery;
 +import org.apache.lucene.analysis.TokenStream; // for javadocs


  /**
 @@ -38,9 +39,11 @@
    protected boolean lazy = false;
    protected boolean omitTermFreqAndPositions = false;
    protected float boost = 1.0f;
 -  // the one and only data object for all different kind of field values
 +  // the data object for all different kind of field values
    protected Object fieldsData = null;
 -  //length/offset for all primitive types
 +  // pre-analyzed tokenStream for indexed fields
 +  protected TokenStream tokenStream;
 +  // length/offset for all primitive types
    protected int binaryLength;
    protected int binaryOffset;

 Index: src/java/org/apache/lucene/document/Field.java
 ===================================================================
 --- src/java/org/apache/lucene/document/Field.java	(revision 786867)
 +++ src/java/org/apache/lucene/document/Field.java	(working copy)
 @@ -94,7 +94,7 @@
      /** Expert: Index the field's value without an Analyzer,
       * and also disable the storing of norms.  Note that you
       * can also separately enable/disable norms by calling
 -     * {@link #setOmitNorms}.  No norms means that
 +     * {@link Field#setOmitNorms}.  No norms means that
       * index-time field and document boosting and field
       * length normalization are disabled.  The benefit is
       * less memory usage as norms take up one byte of RAM
 @@ -159,19 +159,19 @@
    }


 -  /** The value of the field as a String, or null.  If null, the Reader value,
 -   * binary value, or TokenStream value is used.  Exactly one of stringValue(),
 -   * readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */
 +  /** The value of the field as a String, or null.  If null, the Reader value or
 +   * binary value is used.  Exactly one of stringValue(),
 +   * readerValue(), and getBinaryValue() must be set. */
    public String stringValue()   { return fieldsData instanceof String ? (String)fieldsData : null; }

 -  /** The value of the field as a Reader, or null.  If null, the String value,
 -   * binary value, or TokenStream value is used.  Exactly one of stringValue(),
 -   * readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */
 +  /** The value of the field as a Reader, or null.  If null, the String value or
 +   * binary value is used.  Exactly one of stringValue(),
 +   * readerValue(), and getBinaryValue() must be set. */
    public Reader readerValue()   { return fieldsData instanceof Reader ? (Reader)fieldsData : null; }

    /** The value of the field in Binary, or null.  If null, the Reader value,
 -   * String value, or TokenStream value is used. Exactly one of stringValue(),
 -   * readerValue(), getBinaryValue(), and tokenStreamValue() must be set.
 +   * or String value is used. Exactly one of stringValue(),
 +   * readerValue(), and getBinaryValue() must be set.
     * @deprecated This method must allocate a new byte[] if
     * the {@link AbstractField#getBinaryOffset()} is non-zero
     * or {@link AbstractField#getBinaryLength()} is not the
 @@ -191,10 +191,9 @@
      return ret;
    }

 -  /** The value of the field as a TokesStream, or null.  If null, the Reader value,
 -   * String value, or binary value is used. Exactly one of stringValue(),
 -   * readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */
 -  public TokenStream tokenStreamValue()   { return fieldsData instanceof TokenStream ? (TokenStream)fieldsData : null; }
 +  /** The TokesStream for this field to be used when indexing, or null.  If null, the Reader value
 +   * or String value is analyzed to produce the indexed tokens. */
 +  public TokenStream tokenStreamValue()   { return tokenStream; }


    /** <p>Expert: change the value of this field.  This can
 @@ -204,10 +203,7 @@
     *  a single {@link Document} instance is re-used as
     *  well.  This helps most on small documents.</p>
     *
 -   *  <p>Note that you should only use this method after the
 -   *  Field has been consumed (ie, the {@link Document}
 -   *  containing this Field has been added to the index).
 -   *  Also, each Field instance should only be used once
 +   *  <p>Each Field instance should only be used once
     *  within a single {@link Document} instance.  See <a
     *  href="http://wiki.apache.org/lucene-java/ImproveIndexingSpeed">ImproveIndexingSpeed</a>
     *  for details.</p> */
 @@ -250,7 +246,8 @@
    }


 -  /** Expert: change the value of this field.  See <a href="#setValue(java.lang.String)">setValue(String)</a>. */
 +  /** Expert: change the value of this field.  See <a href="#setValue(java.lang.String)">setValue(String)</a>.
 +   * @deprecated use {@link #setTokenStream} */
    public void setValue(TokenStream value) {
      if (isBinary) {
        throw new IllegalArgumentException("cannot set a TokenStream value on a binary field");
 @@ -258,9 +255,18 @@
      if (isStored) {
        throw new IllegalArgumentException("cannot set a TokenStream value on a stored field");
      }
 -    fieldsData = value;
 +    fieldsData = null;
 +    tokenStream = value;
    }

 +  /** Expert: sets the token stream to be used for indexing and causes isIndexed() and isTokenized() to return true.
 +   *  May be combined with stored values from stringValue() or binaryValue() */
 +  public void setTokenStream(TokenStream tokenStream) {
 +    this.isIndexed = true;
 +    this.isTokenized = true;
 +    this.tokenStream = tokenStream;
 +  }
 +
    /**
     * Create a field by specifying its name, value and how it will
     * be saved in the index. Term vectors will not be stored in the index.
 @@ -459,8 +465,9 @@
        throw new NullPointerException("tokenStream cannot be null");

      this.name = name.intern();        // field names are interned
 -    this.fieldsData = tokenStream;
 -
 +    this.fieldsData = null;
 +    this.tokenStream = tokenStream;
 +
      this.isStored = false;
      this.isCompressed = false;

 Index: src/test/org/apache/lucene/index/TestIndexWriter.java
 ===================================================================
 --- src/test/org/apache/lucene/index/TestIndexWriter.java	(revision 786867)
 +++ src/test/org/apache/lucene/index/TestIndexWriter.java	(working copy)
 @@ -17,11 +17,7 @@
   * limitations under the License.
   */

 -import java.io.ByteArrayOutputStream;
 -import java.io.File;
 -import java.io.IOException;
 -import java.io.PrintStream;
 -import java.io.Reader;
 +import java.io.*;
  import java.util.ArrayList;
  import java.util.Arrays;
  import java.util.List;
 @@ -4350,4 +4346,66 @@
      t.join();
      assertFalse(t.failed);
    }
 +
 +
 +  public void testIndexStoreCombos() throws Exception {
 +    MockRAMDirectory dir = new MockRAMDirectory();
 +    IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
 +    byte[] b = new byte[50];
 +    for(int i=0;i<50;i++)
 +      b[i] = (byte) (i+77);
 +
 +    Document doc = new Document();
 +    Field f = new Field("binary", b, 10, 17, Field.Store.YES);
 +    f.setTokenStream(new WhitespaceTokenizer(new StringReader("doc1field1")));
 +    Field f2 = new Field("string", "value", Field.Store.YES,Field.Index.ANALYZED);
 +    f2.setTokenStream(new WhitespaceTokenizer(new StringReader("doc1field2")));
 +    doc.add(f);
 +    doc.add(f2);
 +    w.addDocument(doc);
 +
 +    // add 2 docs to test in-memory merging
 +    f.setTokenStream(new WhitespaceTokenizer(new StringReader("doc2field1")));
 +    f2.setTokenStream(new WhitespaceTokenizer(new StringReader("doc2field2")));
 +    w.addDocument(doc);
 +
 +    // force segment flush so we can force a segment merge with doc3 later.
 +    w.commit();
 +
 +    f.setTokenStream(new WhitespaceTokenizer(new StringReader("doc3field1")));
 +    f2.setTokenStream(new WhitespaceTokenizer(new StringReader("doc3field2")));
 +
 +    w.addDocument(doc);
 +    w.commit();
 +    w.optimize();   // force segment merge.
 +
 +    IndexReader ir = IndexReader.open(dir);
 +    doc = ir.document(0);
 +    f = doc.getField("binary");
 +    b = f.getBinaryValue();
 +    assertTrue(b != null);
 +    assertEquals(17, b.length, 17);
 +    assertEquals(87, b[0]);
 +
 +    assertTrue(ir.document(0).getFieldable("binary").isBinary());
 +    assertTrue(ir.document(1).getFieldable("binary").isBinary());
 +    assertTrue(ir.document(2).getFieldable("binary").isBinary());
 +
 +    assertEquals("value", ir.document(0).get("string"));
 +    assertEquals("value", ir.document(1).get("string"));
 +    assertEquals("value", ir.document(2).get("string"));
 +
 +
 +    // test that the terms were indexed.
 +    assertTrue(ir.termDocs(new Term("binary","doc1field1")).next());
 +    assertTrue(ir.termDocs(new Term("binary","doc2field1")).next());
 +    assertTrue(ir.termDocs(new Term("binary","doc3field1")).next());
 +    assertTrue(ir.termDocs(new Term("string","doc1field2")).next());
 +    assertTrue(ir.termDocs(new Term("string","doc2field2")).next());
 +    assertTrue(ir.termDocs(new Term("string","doc3field2")).next());
 +
 +    ir.close();
 +    dir.close();
 +
 +  }
  }
	Index: CHANGES.txt
	===================================================================
	--- CHANGES.txt (revision 786867)
	+++ CHANGES.txt (working copy)
	@@ -424,7 +424,12 @@

	28. LUCENE-1405: Added support for Ant resource collections in contrib/ant
	<index> task. (Przemyslaw Sztoch via Erik Hatcher)
	+
	+29. LUCENE-1699: Allow setting a TokenStream on Field/Fieldable for indexing
	+ in conjunction with any other ways to specify stored field values,
	+ currently binary or string values. (yonik)

	+
	Optimizations

	1. LUCENE-1427: Fixed QueryWrapperFilter to not waste time computing
	Index: src/java/org/apache/lucene/document/Fieldable.java
	===================================================================
	--- src/java/org/apache/lucene/document/Fieldable.java (revision 786867)
	+++ src/java/org/apache/lucene/document/Fieldable.java (working copy)
	@@ -74,36 +74,41 @@
	*/
	String name();

	- /** The value of the field as a String, or null. If null, the Reader value,
	- * binary value, or TokenStream value is used. Exactly one of stringValue(),
	- * readerValue(), binaryValue(), and tokenStreamValue() must be set. */
	+ /** The value of the field as a String, or null.
	+ * <p>
	+ * For indexing, if isStored()==true, the stringValue() will be used as the stored field value
	+ * unless isBinary()==true, in which case binaryValue() will be used.
	+ *
	+ * If isIndexed()==true and isTokenized()==false, this String value will be indexed as a single token.
	+ * If isIndexed()==true and isTokenized()==true, then tokenStreamValue() will be used to generate indexed tokens if not null,
	+ * else readerValue() will be used to generate indexed tokens if not null, else stringValue() will be used to generate tokens.
	+ */
	public String stringValue();

	- /** The value of the field as a Reader, or null. If null, the String value,
	- * binary value, or TokenStream value is used. Exactly one of stringValue(),
	- * readerValue(), binaryValue(), and tokenStreamValue() must be set. */
	+ /** The value of the field as a Reader, which can be used at index time to generate indexed tokens.
	+ * @see #stringValue()
	+ */
	public Reader readerValue();

	- /** The value of the field in Binary, or null. If null, the Reader value,
	- * String value, or TokenStream value is used. Exactly one of stringValue(),
	- * readerValue(), binaryValue(), and tokenStreamValue() must be set. */
	+ /** The value of the field in Binary, or null.
	+ * @see #stringValue()
	+ */
	public byte[] binaryValue();

	- /** The value of the field as a TokenStream, or null. If null, the Reader value,
	- * String value, or binary value is used. Exactly one of stringValue(),
	- * readerValue(), binaryValue(), and tokenStreamValue() must be set. */
	+ /** The TokenStream for this field to be used when indexing, or null.
	+ * @see #stringValue()
	+ */
	public TokenStream tokenStreamValue();

	- /** True iff the value of the field is to be stored in the index for return
	- with search hits. It is an error for this to be true if a field is
	- Reader-valued. */
	+ /** True if the value of the field is to be stored in the index for return
	+ with search hits. */
	boolean isStored();

	- /** True iff the value of the field is to be indexed, so that it may be
	+ /** True if the value of the field is to be indexed, so that it may be
	searched on. */
	boolean isIndexed();

	- /** True iff the value of the field should be tokenized as text prior to
	+ /** True if the value of the field should be tokenized as text prior to
	indexing. Un-tokenized fields are indexed as a single word and may not be
	Reader-valued. */
	boolean isTokenized();
	@@ -111,7 +116,7 @@
	/** True if the value of the field is stored and compressed within the index */
	boolean isCompressed();

	- /** True iff the term or terms used to index this field are stored as a term
	+ /** True if the term or terms used to index this field are stored as a term
	* vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}.
	* These methods do not provide access to the original content of the field,
	* only to terms used to index it. If the original content must be
	@@ -122,17 +127,17 @@
	boolean isTermVectorStored();

	/**
	- * True iff terms are stored as term vector together with their offsets
	+ * True if terms are stored as term vector together with their offsets
	* (start and end positon in source text).
	*/
	boolean isStoreOffsetWithTermVector();

	/**
	- * True iff terms are stored as term vector together with their token positions.
	+ * True if terms are stored as term vector together with their token positions.
	*/
	boolean isStorePositionWithTermVector();

	- /** True iff the value of the filed is stored as binary */
	+ /** True if the value of the field is stored as binary */
	boolean isBinary();

	/** True if norms are omitted for this indexed field */
	Index: src/java/org/apache/lucene/document/AbstractField.java
	===================================================================
	--- src/java/org/apache/lucene/document/AbstractField.java (revision 786867)
	+++ src/java/org/apache/lucene/document/AbstractField.java (working copy)
	@@ -16,7 +16,8 @@
	*/

	import org.apache.lucene.search.PhraseQuery; // for javadocs
	-import org.apache.lucene.search.spans.SpanQuery; // for javadocs
	+import org.apache.lucene.search.spans.SpanQuery;
	+import org.apache.lucene.analysis.TokenStream; // for javadocs


	/**
	@@ -38,9 +39,11 @@
	protected boolean lazy = false;
	protected boolean omitTermFreqAndPositions = false;
	protected float boost = 1.0f;
	- // the one and only data object for all different kind of field values
	+ // the data object for all different kind of field values
	protected Object fieldsData = null;
	- //length/offset for all primitive types
	+ // pre-analyzed tokenStream for indexed fields
	+ protected TokenStream tokenStream;
	+ // length/offset for all primitive types
	protected int binaryLength;
	protected int binaryOffset;

	Index: src/java/org/apache/lucene/document/Field.java
	===================================================================
	--- src/java/org/apache/lucene/document/Field.java (revision 786867)
	+++ src/java/org/apache/lucene/document/Field.java (working copy)
	@@ -94,7 +94,7 @@
	/** Expert: Index the field's value without an Analyzer,
	* and also disable the storing of norms. Note that you
	* can also separately enable/disable norms by calling
	- * {@link #setOmitNorms}. No norms means that
	+ * {@link Field#setOmitNorms}. No norms means that
	* index-time field and document boosting and field
	* length normalization are disabled. The benefit is
	* less memory usage as norms take up one byte of RAM
	@@ -159,19 +159,19 @@
	}


	- /** The value of the field as a String, or null. If null, the Reader value,
	- * binary value, or TokenStream value is used. Exactly one of stringValue(),
	- * readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */
	+ /** The value of the field as a String, or null. If null, the Reader value or
	+ * binary value is used. Exactly one of stringValue(),
	+ * readerValue(), and getBinaryValue() must be set. */
	public String stringValue() { return fieldsData instanceof String ? (String)fieldsData : null; }

	- /** The value of the field as a Reader, or null. If null, the String value,
	- * binary value, or TokenStream value is used. Exactly one of stringValue(),
	- * readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */
	+ /** The value of the field as a Reader, or null. If null, the String value or
	+ * binary value is used. Exactly one of stringValue(),
	+ * readerValue(), and getBinaryValue() must be set. */
	public Reader readerValue() { return fieldsData instanceof Reader ? (Reader)fieldsData : null; }

	/** The value of the field in Binary, or null. If null, the Reader value,
	- * String value, or TokenStream value is used. Exactly one of stringValue(),
	- * readerValue(), getBinaryValue(), and tokenStreamValue() must be set.
	+ * or String value is used. Exactly one of stringValue(),
	+ * readerValue(), and getBinaryValue() must be set.
	* @deprecated This method must allocate a new byte[] if
	* the {@link AbstractField#getBinaryOffset()} is non-zero
	* or {@link AbstractField#getBinaryLength()} is not the
	@@ -191,10 +191,9 @@
	return ret;
	}

	- /** The value of the field as a TokesStream, or null. If null, the Reader value,
	- * String value, or binary value is used. Exactly one of stringValue(),
	- * readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */
	- public TokenStream tokenStreamValue() { return fieldsData instanceof TokenStream ? (TokenStream)fieldsData : null; }
	+ /** The TokesStream for this field to be used when indexing, or null. If null, the Reader value
	+ * or String value is analyzed to produce the indexed tokens. */
	+ public TokenStream tokenStreamValue() { return tokenStream; }


	/** <p>Expert: change the value of this field. This can
	@@ -204,10 +203,7 @@
	* a single {@link Document} instance is re-used as
	* well. This helps most on small documents.</p>
	*
	- * <p>Note that you should only use this method after the
	- * Field has been consumed (ie, the {@link Document}
	- * containing this Field has been added to the index).
	- * Also, each Field instance should only be used once
	+ * <p>Each Field instance should only be used once
	* within a single {@link Document} instance. See <a
	* href="http://wiki.apache.org/lucene-java/ImproveIndexingSpeed">ImproveIndexingSpeed</a>
	* for details.</p> */
	@@ -250,7 +246,8 @@
	}


	- /** Expert: change the value of this field. See <a href="#setValue(java.lang.String)">setValue(String)</a>. */
	+ /** Expert: change the value of this field. See <a href="#setValue(java.lang.String)">setValue(String)</a>.
	+ * @deprecated use {@link #setTokenStream} */
	public void setValue(TokenStream value) {
	if (isBinary) {
	throw new IllegalArgumentException("cannot set a TokenStream value on a binary field");
	@@ -258,9 +255,18 @@
	if (isStored) {
	throw new IllegalArgumentException("cannot set a TokenStream value on a stored field");
	}
	- fieldsData = value;
	+ fieldsData = null;
	+ tokenStream = value;
	}

	+ /** Expert: sets the token stream to be used for indexing and causes isIndexed() and isTokenized() to return true.
	+ * May be combined with stored values from stringValue() or binaryValue() */
	+ public void setTokenStream(TokenStream tokenStream) {
	+ this.isIndexed = true;
	+ this.isTokenized = true;
	+ this.tokenStream = tokenStream;
	+ }
	+
	/**
	* Create a field by specifying its name, value and how it will
	* be saved in the index. Term vectors will not be stored in the index.
	@@ -459,8 +465,9 @@
	throw new NullPointerException("tokenStream cannot be null");

	this.name = name.intern(); // field names are interned
	- this.fieldsData = tokenStream;
	-
	+ this.fieldsData = null;
	+ this.tokenStream = tokenStream;
	+
	this.isStored = false;
	this.isCompressed = false;

	Index: src/test/org/apache/lucene/index/TestIndexWriter.java
	===================================================================
	--- src/test/org/apache/lucene/index/TestIndexWriter.java (revision 786867)
	+++ src/test/org/apache/lucene/index/TestIndexWriter.java (working copy)
	@@ -17,11 +17,7 @@
	* limitations under the License.
	*/

	-import java.io.ByteArrayOutputStream;
	-import java.io.File;
	-import java.io.IOException;
	-import java.io.PrintStream;
	-import java.io.Reader;
	+import java.io.*;
	import java.util.ArrayList;
	import java.util.Arrays;
	import java.util.List;
	@@ -4350,4 +4346,66 @@
	t.join();
	assertFalse(t.failed);
	}
	+
	+
	+ public void testIndexStoreCombos() throws Exception {
	+ MockRAMDirectory dir = new MockRAMDirectory();
	+ IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
	+ byte[] b = new byte[50];
	+ for(int i=0;i<50;i++)
	+ b[i] = (byte) (i+77);
	+
	+ Document doc = new Document();
	+ Field f = new Field("binary", b, 10, 17, Field.Store.YES);
	+ f.setTokenStream(new WhitespaceTokenizer(new StringReader("doc1field1")));
	+ Field f2 = new Field("string", "value", Field.Store.YES,Field.Index.ANALYZED);
	+ f2.setTokenStream(new WhitespaceTokenizer(new StringReader("doc1field2")));
	+ doc.add(f);
	+ doc.add(f2);
	+ w.addDocument(doc);
	+
	+ // add 2 docs to test in-memory merging
	+ f.setTokenStream(new WhitespaceTokenizer(new StringReader("doc2field1")));
	+ f2.setTokenStream(new WhitespaceTokenizer(new StringReader("doc2field2")));
	+ w.addDocument(doc);
	+
	+ // force segment flush so we can force a segment merge with doc3 later.
	+ w.commit();
	+
	+ f.setTokenStream(new WhitespaceTokenizer(new StringReader("doc3field1")));
	+ f2.setTokenStream(new WhitespaceTokenizer(new StringReader("doc3field2")));
	+
	+ w.addDocument(doc);
	+ w.commit();
	+ w.optimize(); // force segment merge.
	+
	+ IndexReader ir = IndexReader.open(dir);
	+ doc = ir.document(0);
	+ f = doc.getField("binary");
	+ b = f.getBinaryValue();
	+ assertTrue(b != null);
	+ assertEquals(17, b.length, 17);
	+ assertEquals(87, b[0]);
	+
	+ assertTrue(ir.document(0).getFieldable("binary").isBinary());
	+ assertTrue(ir.document(1).getFieldable("binary").isBinary());
	+ assertTrue(ir.document(2).getFieldable("binary").isBinary());
	+
	+ assertEquals("value", ir.document(0).get("string"));
	+ assertEquals("value", ir.document(1).get("string"));
	+ assertEquals("value", ir.document(2).get("string"));
	+
	+
	+ // test that the terms were indexed.
	+ assertTrue(ir.termDocs(new Term("binary","doc1field1")).next());
	+ assertTrue(ir.termDocs(new Term("binary","doc2field1")).next());
	+ assertTrue(ir.termDocs(new Term("binary","doc3field1")).next());
	+ assertTrue(ir.termDocs(new Term("string","doc1field2")).next());
	+ assertTrue(ir.termDocs(new Term("string","doc2field2")).next());
	+ assertTrue(ir.termDocs(new Term("string","doc3field2")).next());
	+
	+ ir.close();
	+ dir.close();
	+
	+ }
	}