docs/attachments/LUCENE-509/DocField.patch - lucene-jira-archive - Git at Google

 Index: src/test/org/apache/lucene/index/TestFieldsReader.java
 ===================================================================
 --- src/test/org/apache/lucene/index/TestFieldsReader.java	(revision 382415)
 +++ src/test/org/apache/lucene/index/TestFieldsReader.java	(working copy)
 @@ -69,7 +69,39 @@
      assertTrue(field.isStorePositionWithTermVector() == false);
      assertTrue(field.getOmitNorms() == true);

 -
      reader.close();
    }
 +
 +  public void testDocField() throws IOException {
 +	    assertTrue(dir != null);
 +	    assertTrue(fieldInfos != null);
 +	    FieldsReader reader = new FieldsReader(dir, "test", fieldInfos);
 +	    assertTrue(reader != null);
 +	    assertTrue(reader.size() == 1);
 +
 +	    Field field = reader.docField(0,"textField1");
 +	    assertTrue(field != null);
 +	    assertEquals(DocHelper.FIELD_1_TEXT, field.stringValue());
 +
 +	    field = reader.docField(0,"textField2");
 +	    assertTrue(field != null);
 +	    assertTrue(field.isTermVectorStored() == true);
 +
 +	    assertTrue(field.isStoreOffsetWithTermVector() == true);
 +	    assertTrue(field.isStorePositionWithTermVector() == true);
 +	    assertTrue(field.getOmitNorms() == false);
 +	    assertEquals(DocHelper.FIELD_2_TEXT, field.stringValue());
 +
 +	    field = reader.docField(0, "textField3");
 +	    assertTrue(field != null);
 +	    assertTrue(field.isTermVectorStored() == false);
 +	    assertTrue(field.isStoreOffsetWithTermVector() == false);
 +	    assertTrue(field.isStorePositionWithTermVector() == false);
 +	    assertTrue(field.getOmitNorms() == true);
 +	    assertEquals(DocHelper.FIELD_3_TEXT, field.stringValue());
 +
 +	    assertTrue(reader.docField(0,"doesnotexist") == null);
 +
 +	    reader.close();
 +  }
  }
 Index: src/java/org/apache/lucene/index/FieldsReader.java
 ===================================================================
 --- src/java/org/apache/lucene/index/FieldsReader.java	(revision 382415)
 +++ src/java/org/apache/lucene/index/FieldsReader.java	(working copy)
 @@ -57,6 +57,11 @@
      return size;
    }

 +  /**
 +   * Retrieve a Document that contains all of the fields defined in the index
 +   * @param n the document number
 +   * @return
 +   */
    final Document doc(int n) throws IOException {
      indexStream.seek(n * 8L);
      long position = indexStream.readLong();
 @@ -67,9 +72,22 @@
      for (int i = 0; i < numFields; i++) {
        int fieldNumber = fieldsStream.readVInt();
        FieldInfo fi = fieldInfos.fieldInfo(fieldNumber);
 +      doc.add(getFieldFromStream(fi));
 +    }

 +    return doc;
 +  }
 +
 +  /**
 +   * Retrive a particular field from fieldsStream, which is currently
 +   * looking at the bits byte of a field.  When finished, the fieldStream
 +   * will be looking at the fieldNum of the next stored field.
 +   * @param fi the FieldInfo for the field being examined (based on the
 +   * previous vint in the stream)
 +   * @return the <tt>Field</tt>.
 +   */
 +  private Field getFieldFromStream(FieldInfo fi) throws IOException {
        byte bits = fieldsStream.readByte();
 -
        boolean compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
        boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;

 @@ -77,9 +95,9 @@
          final byte[] b = new byte[fieldsStream.readVInt()];
          fieldsStream.readBytes(b, 0, b.length);
          if (compressed)
 -          doc.add(new Field(fi.name, uncompress(b), Field.Store.COMPRESS));
 +          return new Field(fi.name, uncompress(b), Field.Store.COMPRESS);
          else
 -          doc.add(new Field(fi.name, b, Field.Store.YES));
 +          return new Field(fi.name, b, Field.Store.YES);
        }
        else {
          Field.Index index;
 @@ -123,7 +141,7 @@
                index,
                termVector);
            f.setOmitNorms(fi.omitNorms);
 -          doc.add(f);
 +          return f;
          }
          else {
            Field f = new Field(fi.name,     // name
 @@ -132,13 +150,53 @@
                  index,
                  termVector);
            f.setOmitNorms(fi.omitNorms);
 -          doc.add(f);
 +          return f;
          }
        }
 -    }
 +  }

 -    return doc;
 +  /**
 +   * Retrieve the first field in the given document with the
 +   * specified field name.  If there are more than one field
 +   * in the document with that field name, only the first one
 +   * is returned.
 +   * @param n the document to retrieve
 +   * @param fieldName the name of the field to retrieve
 +   * @return the first field in the document with that name, or <tt>null</tt>
 +   * if the document doesn't have such a field stored.
 +   * @see Document#getField(String)
 +   * @throws IOException
 +   */
 +  final Field docField(int n, String fieldName) throws IOException {
 +	  int fieldNo = fieldInfos.fieldNumber(fieldName);
 +	  if (fieldNo < 0) return null;
 +
 +	  // Seek to the start of all the fields
 +	  indexStream.seek(n * 8L);
 +	  long position = indexStream.readLong();
 +	  fieldsStream.seek(position);
 +
 +	  int numFields = fieldsStream.readVInt();
 +
 +	  for (int i = 0; i < numFields; i++) {
 +		  int fieldNumber = fieldsStream.readVInt();
 +		  if (fieldNumber == fieldNo) {
 +			  // This is the field we want
 +			  FieldInfo fi = fieldInfos.fieldInfo(fieldNumber);
 +			  return getFieldFromStream(fi);
 +		  } else {
 +			  // This is the field we want to skip
 +			  fieldsStream.readByte();  // The bits
 +			  int dataLength = fieldsStream.readVInt();  // Length to skip;
 +			  fieldsStream.seek(fieldsStream.getFilePointer() + dataLength);
 +			  continue;
 +		  }
 +
 +	  }
 +	  // The field wasn't defined on the document, so ignore it.
 +	  return null;
    }
 +

    private final byte[] uncompress(final byte[] input)
      throws IOException
 Index: src/java/org/apache/lucene/index/IndexReader.java
 ===================================================================
 --- src/java/org/apache/lucene/index/IndexReader.java	(revision 382415)
 +++ src/java/org/apache/lucene/index/IndexReader.java	(working copy)
 @@ -357,6 +357,20 @@
     <code>Document</code> in this index. */
    public abstract Document document(int n) throws IOException;

 +  /**
 +   * Return the document field for the given document.  If querying for
 +   * only one field on a document, implementations may make this more efficient
 +   * that calling document(doc).getField(field).  It will only return the
 +   * first value of the field in the document, like {@link Document#getField(String)}
 +   *
 +   * Only use this function if you know there can be only one value for the field
 +   * (like a document id), this is the only field you want,
 +   * and you want to reduce the overhead of querying.
 +   */
 +  public Field getDocField(int doc, String field) throws IOException {
 +	  return document(doc).getField(field);
 +  }
 +
    /** Returns true if document <i>n</i> has been deleted */
    public abstract boolean isDeleted(int n);

 Index: src/java/org/apache/lucene/index/SegmentReader.java
 ===================================================================
 --- src/java/org/apache/lucene/index/SegmentReader.java	(revision 382415)
 +++ src/java/org/apache/lucene/index/SegmentReader.java	(working copy)
 @@ -284,6 +284,13 @@
      return fieldsReader.doc(n);
    }

 +  public Field getDocField(int doc, String fieldName) throws IOException {
 +	    if (isDeleted(doc))
 +	      throw new IllegalArgumentException
 +	              ("attempt to access a deleted document");
 +	    return fieldsReader.docField(doc, fieldName);
 +  }
 +
    public synchronized boolean isDeleted(int n) {
      return (deletedDocs != null && deletedDocs.get(n));
    }
	Index: src/test/org/apache/lucene/index/TestFieldsReader.java
	===================================================================
	--- src/test/org/apache/lucene/index/TestFieldsReader.java (revision 382415)
	+++ src/test/org/apache/lucene/index/TestFieldsReader.java (working copy)
	@@ -69,7 +69,39 @@
	assertTrue(field.isStorePositionWithTermVector() == false);
	assertTrue(field.getOmitNorms() == true);

	-
	reader.close();
	}
	+
	+ public void testDocField() throws IOException {
	+ assertTrue(dir != null);
	+ assertTrue(fieldInfos != null);
	+ FieldsReader reader = new FieldsReader(dir, "test", fieldInfos);
	+ assertTrue(reader != null);
	+ assertTrue(reader.size() == 1);
	+
	+ Field field = reader.docField(0,"textField1");
	+ assertTrue(field != null);
	+ assertEquals(DocHelper.FIELD_1_TEXT, field.stringValue());
	+
	+ field = reader.docField(0,"textField2");
	+ assertTrue(field != null);
	+ assertTrue(field.isTermVectorStored() == true);
	+
	+ assertTrue(field.isStoreOffsetWithTermVector() == true);
	+ assertTrue(field.isStorePositionWithTermVector() == true);
	+ assertTrue(field.getOmitNorms() == false);
	+ assertEquals(DocHelper.FIELD_2_TEXT, field.stringValue());
	+
	+ field = reader.docField(0, "textField3");
	+ assertTrue(field != null);
	+ assertTrue(field.isTermVectorStored() == false);
	+ assertTrue(field.isStoreOffsetWithTermVector() == false);
	+ assertTrue(field.isStorePositionWithTermVector() == false);
	+ assertTrue(field.getOmitNorms() == true);
	+ assertEquals(DocHelper.FIELD_3_TEXT, field.stringValue());
	+
	+ assertTrue(reader.docField(0,"doesnotexist") == null);
	+
	+ reader.close();
	+ }
	}
	Index: src/java/org/apache/lucene/index/FieldsReader.java
	===================================================================
	--- src/java/org/apache/lucene/index/FieldsReader.java (revision 382415)
	+++ src/java/org/apache/lucene/index/FieldsReader.java (working copy)
	@@ -57,6 +57,11 @@
	return size;
	}

	+ /**
	+ * Retrieve a Document that contains all of the fields defined in the index
	+ * @param n the document number
	+ * @return
	+ */
	final Document doc(int n) throws IOException {
	indexStream.seek(n * 8L);
	long position = indexStream.readLong();
	@@ -67,9 +72,22 @@
	for (int i = 0; i < numFields; i++) {
	int fieldNumber = fieldsStream.readVInt();
	FieldInfo fi = fieldInfos.fieldInfo(fieldNumber);
	+ doc.add(getFieldFromStream(fi));
	+ }

	+ return doc;
	+ }
	+
	+ /**
	+ * Retrive a particular field from fieldsStream, which is currently
	+ * looking at the bits byte of a field. When finished, the fieldStream
	+ * will be looking at the fieldNum of the next stored field.
	+ * @param fi the FieldInfo for the field being examined (based on the
	+ * previous vint in the stream)
	+ * @return the <tt>Field</tt>.
	+ */
	+ private Field getFieldFromStream(FieldInfo fi) throws IOException {
	byte bits = fieldsStream.readByte();
	-
	boolean compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
	boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;

	@@ -77,9 +95,9 @@
	final byte[] b = new byte[fieldsStream.readVInt()];
	fieldsStream.readBytes(b, 0, b.length);
	if (compressed)
	- doc.add(new Field(fi.name, uncompress(b), Field.Store.COMPRESS));
	+ return new Field(fi.name, uncompress(b), Field.Store.COMPRESS);
	else
	- doc.add(new Field(fi.name, b, Field.Store.YES));
	+ return new Field(fi.name, b, Field.Store.YES);
	}
	else {
	Field.Index index;
	@@ -123,7 +141,7 @@
	index,
	termVector);
	f.setOmitNorms(fi.omitNorms);
	- doc.add(f);
	+ return f;
	}
	else {
	Field f = new Field(fi.name, // name
	@@ -132,13 +150,53 @@
	index,
	termVector);
	f.setOmitNorms(fi.omitNorms);
	- doc.add(f);
	+ return f;
	}
	}
	- }
	+ }

	- return doc;
	+ /**
	+ * Retrieve the first field in the given document with the
	+ * specified field name. If there are more than one field
	+ * in the document with that field name, only the first one
	+ * is returned.
	+ * @param n the document to retrieve
	+ * @param fieldName the name of the field to retrieve
	+ * @return the first field in the document with that name, or <tt>null</tt>
	+ * if the document doesn't have such a field stored.
	+ * @see Document#getField(String)
	+ * @throws IOException
	+ */
	+ final Field docField(int n, String fieldName) throws IOException {
	+ int fieldNo = fieldInfos.fieldNumber(fieldName);
	+ if (fieldNo < 0) return null;
	+
	+ // Seek to the start of all the fields
	+ indexStream.seek(n * 8L);
	+ long position = indexStream.readLong();
	+ fieldsStream.seek(position);
	+
	+ int numFields = fieldsStream.readVInt();
	+
	+ for (int i = 0; i < numFields; i++) {
	+ int fieldNumber = fieldsStream.readVInt();
	+ if (fieldNumber == fieldNo) {
	+ // This is the field we want
	+ FieldInfo fi = fieldInfos.fieldInfo(fieldNumber);
	+ return getFieldFromStream(fi);
	+ } else {
	+ // This is the field we want to skip
	+ fieldsStream.readByte(); // The bits
	+ int dataLength = fieldsStream.readVInt(); // Length to skip;
	+ fieldsStream.seek(fieldsStream.getFilePointer() + dataLength);
	+ continue;
	+ }
	+
	+ }
	+ // The field wasn't defined on the document, so ignore it.
	+ return null;
	}
	+

	private final byte[] uncompress(final byte[] input)
	throws IOException
	Index: src/java/org/apache/lucene/index/IndexReader.java
	===================================================================
	--- src/java/org/apache/lucene/index/IndexReader.java (revision 382415)
	+++ src/java/org/apache/lucene/index/IndexReader.java (working copy)
	@@ -357,6 +357,20 @@
	<code>Document</code> in this index. */
	public abstract Document document(int n) throws IOException;

	+ /**
	+ * Return the document field for the given document. If querying for
	+ * only one field on a document, implementations may make this more efficient
	+ * that calling document(doc).getField(field). It will only return the
	+ * first value of the field in the document, like {@link Document#getField(String)}
	+ *
	+ * Only use this function if you know there can be only one value for the field
	+ * (like a document id), this is the only field you want,
	+ * and you want to reduce the overhead of querying.
	+ */
	+ public Field getDocField(int doc, String field) throws IOException {
	+ return document(doc).getField(field);
	+ }
	+
	/** Returns true if document <i>n</i> has been deleted */
	public abstract boolean isDeleted(int n);

	Index: src/java/org/apache/lucene/index/SegmentReader.java
	===================================================================
	--- src/java/org/apache/lucene/index/SegmentReader.java (revision 382415)
	+++ src/java/org/apache/lucene/index/SegmentReader.java (working copy)
	@@ -284,6 +284,13 @@
	return fieldsReader.doc(n);
	}

	+ public Field getDocField(int doc, String fieldName) throws IOException {
	+ if (isDeleted(doc))
	+ throw new IllegalArgumentException
	+ ("attempt to access a deleted document");
	+ return fieldsReader.docField(doc, fieldName);
	+ }
	+
	public synchronized boolean isDeleted(int n) {
	return (deletedDocs != null && deletedDocs.get(n));
	}