blob: 774c166ed8c6a0995e2c91434134008a569f407b [file] [log] [blame]
Index: src/test/org/apache/lucene/store/IndexInputTest.java
===================================================================
--- src/test/org/apache/lucene/store/IndexInputTest.java (révision 0)
+++ src/test/org/apache/lucene/store/IndexInputTest.java (révision 0)
@@ -0,0 +1,104 @@
+package org.apache.lucene.store;
+
+import junit.framework.TestCase;
+
+public class IndexInputTest extends TestCase {
+
+ public void testInt() throws Exception {
+ genericTestInt(0);
+ genericTestInt(1);
+ genericTestInt(-1);
+ genericTestInt(Integer.MAX_VALUE);
+ genericTestInt(Integer.MIN_VALUE);
+ }
+
+ public void testVInt() throws Exception {
+ genericTestVInt(0);
+ genericTestVInt(1);
+ genericTestVInt(-1);
+ genericTestVInt(Integer.MAX_VALUE);
+ genericTestVInt(Integer.MIN_VALUE);
+ }
+
+ public void testLong() throws Exception {
+ genericTestLong(0);
+ genericTestLong(1);
+ genericTestLong(-1);
+ genericTestLong(Long.MAX_VALUE);
+ genericTestLong(Long.MIN_VALUE);
+ }
+
+ public void testVLong() throws Exception {
+ genericTestVLong(0);
+ genericTestVLong(1);
+ genericTestVLong(-1);
+ genericTestVLong(Long.MAX_VALUE);
+ genericTestVLong(Long.MIN_VALUE);
+ }
+
+ public void testString() throws Exception {
+ genericTestString("");
+ genericTestString("a");
+ genericTestString("GiyNNKHhnivNKKHgcNiCniCH716534912é_è'-(é(_çà-é$*ù!:;,!:;,");
+ }
+
+ private void genericTestInt(int i) throws Exception {
+ RAMFile fileA = new RAMFile();
+ RAMFile fileB = new RAMFile();
+ RAMOutputStream outA = new RAMOutputStream(fileA);
+ outA.writeInt(i);
+ outA.close();
+ RAMOutputStream outB = new RAMOutputStream(fileB);
+ outB.writeInt(new RAMInputStream(fileA));
+ outB.close();
+ assertEquals(i, new RAMInputStream(fileB).readInt());
+ }
+
+ private void genericTestVInt(int i) throws Exception {
+ RAMFile fileA = new RAMFile();
+ RAMFile fileB = new RAMFile();
+ RAMOutputStream outA = new RAMOutputStream(fileA);
+ outA.writeVInt(i);
+ outA.close();
+ RAMOutputStream outB = new RAMOutputStream(fileB);
+ outB.writeVInt(new RAMInputStream(fileA));
+ outB.close();
+ assertEquals(i, new RAMInputStream(fileB).readVInt());
+ }
+
+ private void genericTestLong(long l) throws Exception {
+ RAMFile fileA = new RAMFile();
+ RAMFile fileB = new RAMFile();
+ RAMOutputStream outA = new RAMOutputStream(fileA);
+ outA.writeLong(l);
+ outA.close();
+ RAMOutputStream outB = new RAMOutputStream(fileB);
+ outB.writeLong(new RAMInputStream(fileA));
+ outB.close();
+ assertEquals(l, new RAMInputStream(fileB).readLong());
+ }
+
+ private void genericTestVLong(long l) throws Exception {
+ RAMFile fileA = new RAMFile();
+ RAMFile fileB = new RAMFile();
+ RAMOutputStream outA = new RAMOutputStream(fileA);
+ outA.writeVLong(l);
+ outA.close();
+ RAMOutputStream outB = new RAMOutputStream(fileB);
+ outB.writeVLong(new RAMInputStream(fileA));
+ outB.close();
+ assertEquals(l, new RAMInputStream(fileB).readVLong());
+ }
+
+ private void genericTestString(String s) throws Exception {
+ RAMFile fileA = new RAMFile();
+ RAMFile fileB = new RAMFile();
+ RAMOutputStream outA = new RAMOutputStream(fileA);
+ outA.writeString(s);
+ outA.close();
+ RAMOutputStream outB = new RAMOutputStream(fileB);
+ outB.writeString(new RAMInputStream(fileA));
+ outB.close();
+ assertEquals(s, new RAMInputStream(fileB).readString());
+ }
+}
Index: src/test/org/apache/lucene/index/TestSegmentTermDocs.java
===================================================================
--- src/test/org/apache/lucene/index/TestSegmentTermDocs.java (révision 433317)
+++ src/test/org/apache/lucene/index/TestSegmentTermDocs.java (copie de travail)
@@ -49,7 +49,7 @@
public void testTermDocs() throws IOException {
//After adding the document, we should be able to read it back in
- SegmentReader reader = SegmentReader.get(new SegmentInfo("test", 1, dir));
+ SegmentReader reader = SegmentReader.get(new SegmentInfo("test", 1, dir), new DefaultIndexFormat());
assertTrue(reader != null);
SegmentTermDocs segTermDocs = new SegmentTermDocs(reader);
assertTrue(segTermDocs != null);
@@ -67,7 +67,7 @@
public void testBadSeek() throws IOException {
{
//After adding the document, we should be able to read it back in
- SegmentReader reader = SegmentReader.get(new SegmentInfo("test", 3, dir));
+ SegmentReader reader = SegmentReader.get(new SegmentInfo("test", 3, dir), new DefaultIndexFormat());
assertTrue(reader != null);
SegmentTermDocs segTermDocs = new SegmentTermDocs(reader);
assertTrue(segTermDocs != null);
@@ -77,7 +77,7 @@
}
{
//After adding the document, we should be able to read it back in
- SegmentReader reader = SegmentReader.get(new SegmentInfo("test", 3, dir));
+ SegmentReader reader = SegmentReader.get(new SegmentInfo("test", 3, dir), new DefaultIndexFormat());
assertTrue(reader != null);
SegmentTermDocs segTermDocs = new SegmentTermDocs(reader);
assertTrue(segTermDocs != null);
Index: src/test/org/apache/lucene/index/TestSegmentMerger.java
===================================================================
--- src/test/org/apache/lucene/index/TestSegmentMerger.java (révision 433317)
+++ src/test/org/apache/lucene/index/TestSegmentMerger.java (copie de travail)
@@ -49,8 +49,8 @@
DocHelper.writeDoc(merge1Dir, merge1Segment, doc1);
DocHelper.setupDoc(doc2);
DocHelper.writeDoc(merge2Dir, merge2Segment, doc2);
- reader1 = SegmentReader.get(new SegmentInfo(merge1Segment, 1, merge1Dir));
- reader2 = SegmentReader.get(new SegmentInfo(merge2Segment, 1, merge2Dir));
+ reader1 = SegmentReader.get(new SegmentInfo(merge1Segment, 1, merge1Dir), new DefaultIndexFormat());
+ reader2 = SegmentReader.get(new SegmentInfo(merge2Segment, 1, merge2Dir), new DefaultIndexFormat());
}
public void test() {
@@ -69,7 +69,7 @@
merger.closeReaders();
assertTrue(docsMerged == 2);
//Should be able to open a new SegmentReader against the new directory
- SegmentReader mergedReader = SegmentReader.get(new SegmentInfo(mergedSegment, docsMerged, mergedDir));
+ SegmentReader mergedReader = SegmentReader.get(new SegmentInfo(mergedSegment, docsMerged, mergedDir), new DefaultIndexFormat());
assertTrue(mergedReader != null);
assertTrue(mergedReader.numDocs() == 2);
Document newDoc1 = mergedReader.document(0);
Index: src/test/org/apache/lucene/index/DocHelper.java
===================================================================
--- src/test/org/apache/lucene/index/DocHelper.java (révision 433317)
+++ src/test/org/apache/lucene/index/DocHelper.java (copie de travail)
@@ -253,7 +253,7 @@
*/
public static void writeDoc(Directory dir, Analyzer analyzer, Similarity similarity, String segment, Document doc) throws IOException
{
- DocumentWriter writer = new DocumentWriter(dir, analyzer, similarity, 50);
+ DocumentWriter writer = new DocumentWriter(dir, new DefaultIndexFormat(), analyzer, similarity, 50);
writer.addDocument(segment, doc);
}
Index: src/test/org/apache/lucene/index/TestDoc.java
===================================================================
--- src/test/org/apache/lucene/index/TestDoc.java (révision 433317)
+++ src/test/org/apache/lucene/index/TestDoc.java (copie de travail)
@@ -162,7 +162,7 @@
Directory directory = FSDirectory.getDirectory(indexDir, false);
Analyzer analyzer = new SimpleAnalyzer();
DocumentWriter writer =
- new DocumentWriter(directory, analyzer, Similarity.getDefault(), 1000);
+ new DocumentWriter(directory, new DefaultIndexFormat(), analyzer, Similarity.getDefault(), 1000);
File file = new File(workDir, fileName);
Document doc = FileDocument.Document(file);
@@ -177,8 +177,8 @@
throws Exception {
Directory directory = FSDirectory.getDirectory(indexDir, false);
- SegmentReader r1 = SegmentReader.get(new SegmentInfo(seg1, 1, directory));
- SegmentReader r2 = SegmentReader.get(new SegmentInfo(seg2, 1, directory));
+ SegmentReader r1 = SegmentReader.get(new SegmentInfo(seg1, 1, directory), new DefaultIndexFormat());
+ SegmentReader r2 = SegmentReader.get(new SegmentInfo(seg2, 1, directory), new DefaultIndexFormat());
SegmentMerger merger =
new SegmentMerger(directory, merged);
@@ -202,7 +202,7 @@
throws Exception {
Directory directory = FSDirectory.getDirectory(indexDir, false);
SegmentReader reader =
- SegmentReader.get(new SegmentInfo(segment, 1, directory));
+ SegmentReader.get(new SegmentInfo(segment, 1, directory), new DefaultIndexFormat());
for (int i = 0; i < reader.numDocs(); i++)
out.println(reader.document(i));
Index: src/test/org/apache/lucene/index/TestDocumentWriter.java
===================================================================
--- src/test/org/apache/lucene/index/TestDocumentWriter.java (révision 433317)
+++ src/test/org/apache/lucene/index/TestDocumentWriter.java (copie de travail)
@@ -53,11 +53,11 @@
DocHelper.setupDoc(testDoc);
Analyzer analyzer = new WhitespaceAnalyzer();
Similarity similarity = Similarity.getDefault();
- DocumentWriter writer = new DocumentWriter(dir, analyzer, similarity, 50);
+ DocumentWriter writer = new DocumentWriter(dir, new DefaultIndexFormat(), analyzer, similarity, 50);
String segName = "test";
writer.addDocument(segName, testDoc);
//After adding the document, we should be able to read it back in
- SegmentReader reader = SegmentReader.get(new SegmentInfo(segName, 1, dir));
+ SegmentReader reader = SegmentReader.get(new SegmentInfo(segName, 1, dir), new DefaultIndexFormat());
assertTrue(reader != null);
Document doc = reader.document(0);
assertTrue(doc != null);
@@ -107,14 +107,14 @@
};
Similarity similarity = Similarity.getDefault();
- DocumentWriter writer = new DocumentWriter(dir, analyzer, similarity, 50);
+ DocumentWriter writer = new DocumentWriter(dir, new DefaultIndexFormat(), analyzer, similarity, 50);
Document doc = new Document();
doc.add(new Field("repeated", "repeated one", Field.Store.YES, Field.Index.TOKENIZED));
doc.add(new Field("repeated", "repeated two", Field.Store.YES, Field.Index.TOKENIZED));
String segName = "test";
writer.addDocument(segName, doc);
- SegmentReader reader = SegmentReader.get(new SegmentInfo(segName, 1, dir));
+ SegmentReader reader = SegmentReader.get(new SegmentInfo(segName, 1, dir), new DefaultIndexFormat());
TermPositions termPositions = reader.termPositions(new Term("repeated", "repeated"));
assertTrue(termPositions.next());
Index: src/test/org/apache/lucene/index/TestMultiReader.java
===================================================================
--- src/test/org/apache/lucene/index/TestMultiReader.java (révision 433317)
+++ src/test/org/apache/lucene/index/TestMultiReader.java (copie de travail)
@@ -42,8 +42,8 @@
DocHelper.writeDoc(dir, "seg-1", doc1);
DocHelper.writeDoc(dir, "seg-2", doc2);
sis.write(dir);
- reader1 = SegmentReader.get(new SegmentInfo("seg-1", 1, dir));
- reader2 = SegmentReader.get(new SegmentInfo("seg-2", 1, dir));
+ reader1 = SegmentReader.get(new SegmentInfo("seg-1", 1, dir), new DefaultIndexFormat());
+ reader2 = SegmentReader.get(new SegmentInfo("seg-2", 1, dir), new DefaultIndexFormat());
readers[0] = reader1;
readers[1] = reader2;
}
@@ -57,7 +57,7 @@
public void testDocument() throws IOException {
sis.read(dir);
- MultiReader reader = new MultiReader(dir, sis, false, readers);
+ MultiReader reader = new MultiReader(dir, new DefaultIndexFormat(), sis, false, readers);
assertTrue(reader != null);
Document newDoc1 = reader.document(0);
assertTrue(newDoc1 != null);
@@ -72,7 +72,7 @@
public void testUndeleteAll() throws IOException {
sis.read(dir);
- MultiReader reader = new MultiReader(dir, sis, false, readers);
+ MultiReader reader = new MultiReader(dir, new DefaultIndexFormat(), sis, false, readers);
assertTrue(reader != null);
assertEquals( 2, reader.numDocs() );
reader.deleteDocument(0);
@@ -83,7 +83,7 @@
public void testTermVectors() {
- MultiReader reader = new MultiReader(dir, sis, false, readers);
+ MultiReader reader = new MultiReader(dir, new DefaultIndexFormat(), sis, false, readers);
assertTrue(reader != null);
}
}
Index: src/test/org/apache/lucene/index/TestSegmentReader.java
===================================================================
--- src/test/org/apache/lucene/index/TestSegmentReader.java (révision 433317)
+++ src/test/org/apache/lucene/index/TestSegmentReader.java (copie de travail)
@@ -41,7 +41,7 @@
protected void setUp() throws IOException {
DocHelper.setupDoc(testDoc);
DocHelper.writeDoc(dir, testDoc);
- reader = SegmentReader.get(new SegmentInfo("test", 1, dir));
+ reader = SegmentReader.get(new SegmentInfo("test", 1, dir), new DefaultIndexFormat());
}
protected void tearDown() {
@@ -75,7 +75,7 @@
Document docToDelete = new Document();
DocHelper.setupDoc(docToDelete);
DocHelper.writeDoc(dir, "seg-to-delete", docToDelete);
- SegmentReader deleteReader = SegmentReader.get(new SegmentInfo("seg-to-delete", 1, dir));
+ SegmentReader deleteReader = SegmentReader.get(new SegmentInfo("seg-to-delete", 1, dir), new DefaultIndexFormat());
assertTrue(deleteReader != null);
assertTrue(deleteReader.numDocs() == 1);
deleteReader.deleteDocument(0);
Index: src/test/org/apache/lucene/index/TestFieldsReader.java
===================================================================
--- src/test/org/apache/lucene/index/TestFieldsReader.java (révision 433317)
+++ src/test/org/apache/lucene/index/TestFieldsReader.java (copie de travail)
@@ -49,7 +49,7 @@
fieldInfos = new FieldInfos();
DocHelper.setupDoc(testDoc);
fieldInfos.add(testDoc);
- DocumentWriter writer = new DocumentWriter(dir, new WhitespaceAnalyzer(),
+ DocumentWriter writer = new DocumentWriter(dir, new DefaultIndexFormat(), new WhitespaceAnalyzer(),
Similarity.getDefault(), 50);
assertTrue(writer != null);
writer.addDocument("test", testDoc);
@@ -167,7 +167,7 @@
File file = new File(path);
FSDirectory tmpDir = FSDirectory.getDirectory(file, true);
assertTrue(tmpDir != null);
- DocumentWriter writer = new DocumentWriter(tmpDir, new WhitespaceAnalyzer(),
+ DocumentWriter writer = new DocumentWriter(tmpDir, new DefaultIndexFormat(), new WhitespaceAnalyzer(),
Similarity.getDefault(), 50);
assertTrue(writer != null);
writer.addDocument("test", testDoc);
Index: src/java/org/apache/lucene/index/FieldInfo.java
===================================================================
--- src/java/org/apache/lucene/index/FieldInfo.java (révision 433317)
+++ src/java/org/apache/lucene/index/FieldInfo.java (copie de travail)
@@ -16,7 +16,9 @@
* limitations under the License.
*/
-final class FieldInfo {
+import org.apache.lucene.document.Field;
+
+public final class FieldInfo {
String name;
boolean isIndexed;
int number;
@@ -28,8 +30,8 @@
boolean omitNorms; // omit norms associated with indexed fields
- FieldInfo(String na, boolean tk, int nu, boolean storeTermVector,
- boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, boolean omitNorms) {
+ FieldInfo(String na, boolean tk, int nu, boolean storeTermVector, boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
+ boolean omitNorms) {
name = na;
isIndexed = tk;
number = nu;
@@ -38,4 +40,28 @@
this.storePositionWithTermVector = storePositionWithTermVector;
this.omitNorms = omitNorms;
}
+
+ public String getName() {
+ return name;
+ }
+
+ public boolean omitNorms() {
+ return omitNorms;
+ }
+
+ public boolean isIndexed() {
+ return isIndexed;
+ }
+
+ public boolean storeOffsetWithTermVector() {
+ return storeOffsetWithTermVector;
+ }
+
+ public boolean storePositionWithTermVector() {
+ return storePositionWithTermVector;
+ }
+
+ public boolean storeTermVector() {
+ return storeTermVector;
+ }
}
Index: src/java/org/apache/lucene/index/MultiReader.java
===================================================================
--- src/java/org/apache/lucene/index/MultiReader.java (révision 433317)
+++ src/java/org/apache/lucene/index/MultiReader.java (copie de travail)
@@ -19,6 +19,7 @@
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexOutput;
import java.io.IOException;
import java.util.Collection;
@@ -43,17 +44,29 @@
* Directory locking for delete, undeleteAll, and setNorm operations is
* left to the subreaders. </p>
* <p>Note that all subreaders are closed if this Multireader is closed.</p>
+ *
* @param subReaders set of (sub)readers
+ * @param indexFormat the format definition of the index
* @throws IOException
*/
- public MultiReader(IndexReader[] subReaders) throws IOException {
- super(subReaders.length == 0 ? null : subReaders[0].directory());
+ public MultiReader(IndexReader[] subReaders, IndexFormat indexFormat) throws IOException {
+ super(subReaders.length == 0 ? null : subReaders[0].directory(), indexFormat);
initialize(subReaders);
}
+ /**
+ * Same contructor as the previous one. This one use the default index format
+ *
+ * @param subReaders
+ * @throws IOException
+ */
+ public MultiReader(IndexReader[] subReaders) throws IOException {
+ this(subReaders, new DefaultIndexFormat());
+ }
+
/** Construct reading the named set of readers. */
- MultiReader(Directory directory, SegmentInfos sis, boolean closeDirectory, IndexReader[] subReaders) {
- super(directory, sis, closeDirectory);
+ MultiReader(Directory directory, IndexFormat indexFormat, SegmentInfos sis, boolean closeDirectory, IndexReader[] subReaders) {
+ super(directory, indexFormat, sis, closeDirectory);
initialize(subReaders);
}
Index: src/java/org/apache/lucene/index/FieldData.java
===================================================================
--- src/java/org/apache/lucene/index/FieldData.java (révision 0)
+++ src/java/org/apache/lucene/index/FieldData.java (révision 0)
@@ -0,0 +1,176 @@
+package org.apache.lucene.index;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.io.Serializable;
+
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+
+/**
+ *
+ *
+ **/
+public abstract class FieldData implements Serializable {
+ private boolean isStored = true;
+
+ private boolean isTokenized = true;
+
+ private boolean isBinary = false;
+
+ private boolean lazy = false;
+
+ // the one and only data object for all different kind of field values
+ private Object fieldsData = null;
+
+ private long pointer;
+
+ private IndexInput fieldsStream;
+
+ private long toRead;
+
+ /**
+ * This contructor should only be called in a implementation
+ * of the FieldsReader or by a contructor of a sublass.
+ */
+ public FieldData() {
+ //nothing to initialize
+ }
+
+ public FieldData(String text) {
+ if (text == null)
+ throw new NullPointerException("value cannot be null");
+ fieldsData = text;
+ isBinary = false;
+ }
+
+ public FieldData(byte[] data) {
+ if (data == null)
+ throw new IllegalArgumentException("value cannot be null");
+ fieldsData = data;
+ isBinary = true;
+ }
+
+ public FieldData(Reader reader) {
+ if (reader == null)
+ throw new NullPointerException("reader cannot be null");
+ fieldsData = reader;
+ isBinary = false;
+ }
+
+ protected void setData(String text) {
+ fieldsData = text;
+ }
+
+ protected void setData(Reader reader) {
+ fieldsData = reader;
+ }
+
+ protected void setData(byte[] data) {
+ fieldsData = data;
+ }
+
+ /**
+ * True iff the value of the field should be tokenized as text prior to
+ * indexing. Un-tokenized fields are indexed as a single word and may not be
+ * Reader-valued. */
+ public final boolean isTokenized() {
+ return isTokenized;
+ }
+
+ /**
+ *
+ * True iff the value of the filed is stored as binary
+ */
+ public final boolean isBinary() {
+ return isBinary;
+ }
+
+ public final boolean isLazy() {
+ return lazy;
+ }
+
+ public final boolean isStored() {
+ return isStored;
+ }
+
+ public void setBinary(boolean isBinary) {
+ this.isBinary = isBinary;
+ }
+
+ public void setStored(boolean isStored) {
+ this.isStored = isStored;
+ }
+
+ public void setTokenized(boolean isTokenized) {
+ this.isTokenized = isTokenized;
+ }
+
+ /**
+ * Prints a Field for human consumption.
+ */
+ public final String toString() {
+ if (fieldsData != null && !lazy) {
+ return fieldsData.toString();
+ }
+ return "<>";
+ }
+
+ public final String stringValue() {
+ if (lazy && fieldsData == null) {
+ readLazyData();
+ }
+ return fieldsData instanceof String ? (String) fieldsData : null;
+ }
+
+ public final Reader readerValue() {
+ if (lazy && fieldsData == null) {
+ readLazyData();
+ }
+ return fieldsData instanceof Reader ? (Reader) fieldsData : null;
+ }
+
+ public final byte[] binaryValue() {
+ if (lazy && fieldsData == null) {
+ readLazyData();
+ }
+ return fieldsData instanceof byte[] ? (byte[]) fieldsData : null;
+ }
+
+ void internalReadData(IndexInput in, boolean lazy) throws IOException {
+ this.lazy = lazy;
+ if (lazy) {
+ fieldsStream = in;
+ pointer = in.getFilePointer();
+ }
+ readData(in, lazy);
+ if (lazy) {
+ fieldsStream = in;
+ toRead = in.getFilePointer() - pointer;
+ }
+ }
+
+ abstract public void readData(IndexInput in, boolean skip) throws IOException;
+
+ void internalWriteData(IndexOutput out) throws IOException {
+ isStored = true;
+ if (lazy) {
+ fieldsStream.seek(pointer);
+ out.writeBytes(fieldsStream, toRead);
+ } else {
+ writeData(out);
+ }
+ }
+
+ abstract public void writeData(IndexOutput out) throws IOException;
+
+ private final void readLazyData() {
+ try {
+ fieldsStream.seek(pointer);
+ readData(fieldsStream, false);
+ } catch (IOException e) {
+ throw new FieldReaderException(e);
+ }
+ }
+
+}
Index: src/java/org/apache/lucene/index/IndexFormat.java
===================================================================
--- src/java/org/apache/lucene/index/IndexFormat.java (révision 0)
+++ src/java/org/apache/lucene/index/IndexFormat.java (révision 0)
@@ -0,0 +1,13 @@
+package org.apache.lucene.index;
+
+import java.io.IOException;
+
+import org.apache.lucene.store.Directory;
+
+public interface IndexFormat {
+
+ FieldsReader getFieldsReader(Directory d, String segment, FieldInfos fn) throws IOException;
+
+ FieldsWriter getFieldsWriter(Directory d, String segment, FieldInfos fn) throws IOException;
+
+}
Index: src/java/org/apache/lucene/index/FieldInfos.java
===================================================================
--- src/java/org/apache/lucene/index/FieldInfos.java (révision 433317)
+++ src/java/org/apache/lucene/index/FieldInfos.java (copie de travail)
@@ -31,7 +31,7 @@
* be adding documents at a time, with no other reader or writer threads
* accessing this object.
*/
-final class FieldInfos {
+public final class FieldInfos {
static final byte IS_INDEXED = 0x1;
static final byte STORE_TERMVECTOR = 0x2;
Index: src/java/org/apache/lucene/index/DefaultFieldData.java
===================================================================
--- src/java/org/apache/lucene/index/DefaultFieldData.java (révision 0)
+++ src/java/org/apache/lucene/index/DefaultFieldData.java (révision 0)
@@ -0,0 +1,191 @@
+package org.apache.lucene.index;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.Reader;
+import java.util.zip.DataFormatException;
+import java.util.zip.Deflater;
+import java.util.zip.Inflater;
+
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+
+/**
+ *
+ *
+ **/
+public class DefaultFieldData extends FieldData {
+
+ private boolean isCompressed = false;
+
+ public DefaultFieldData() {
+
+ }
+
+ public DefaultFieldData(String data) {
+ super(data);
+ }
+
+ public DefaultFieldData(byte[] data) {
+ super(data);
+ }
+
+ public DefaultFieldData(Reader reader) {
+ super(reader);
+ }
+
+ /**
+ * True if the value of the field is stored and compressed within the index
+ */
+ public final boolean isCompressed() {
+ return isCompressed;
+ }
+
+ public void setCompressed(boolean isCompressed) {
+ this.isCompressed = isCompressed;
+ }
+
+ private static final byte FIELD_IS_TOKENIZED = 0x1;
+
+ private static final byte FIELD_IS_BINARY = 0x2;
+
+ private static final byte FIELD_IS_COMPRESSED = 0x4;
+
+ public void readData(IndexInput in, boolean skip) throws IOException {
+ byte bits = in.readByte();
+ isCompressed = (bits & FIELD_IS_COMPRESSED) != 0;
+ setTokenized((bits & FIELD_IS_TOKENIZED) != 0);
+ setBinary((bits & FIELD_IS_BINARY) != 0);
+
+ if (skip) {
+ int toRead = in.readVInt();
+ if (isBinary() || isCompressed()) {
+ long pointer = in.getFilePointer();
+ //Need to move the pointer ahead by toRead positions
+ in.seek(pointer + toRead);
+ } else {
+ //Skip ahead of where we are by the length of what is stored
+ in.skipChars(toRead);
+ }
+ } else {
+ if (isBinary()) {
+ int toRead = in.readVInt();
+ final byte[] b = new byte[toRead];
+ in.readBytes(b, 0, b.length);
+ if (isCompressed()) {
+ setData(uncompress(b));
+ } else {
+ setData(b);
+ }
+ } else {
+ if (isCompressed()) {
+ int toRead = in.readVInt();
+ final byte[] b = new byte[toRead];
+ in.readBytes(b, 0, b.length);
+ setData(new String(uncompress(b), "UTF-8"));
+ } else {
+ setData(in.readString()); // read value
+ }
+ }
+ }
+ }
+
+ public void writeData(IndexOutput out) throws IOException {
+ byte bits = 0;
+ if (isTokenized())
+ bits |= FIELD_IS_TOKENIZED;
+ if (isBinary())
+ bits |= FIELD_IS_BINARY;
+ if (isCompressed())
+ bits |= FIELD_IS_COMPRESSED;
+
+ out.writeByte(bits);
+
+ if (isCompressed()) {
+ // compression is enabled for the current field
+ byte[] data = null;
+ // check if it is a binary field
+ if (isBinary()) {
+ data = compress(binaryValue());
+ } else {
+ data = compress(stringValue().getBytes("UTF-8"));
+ }
+ final int len = data.length;
+ out.writeVInt(len);
+ out.writeBytes(data, len);
+ } else {
+ // compression is disabled for the current field
+ if (isBinary()) {
+ byte[] data = binaryValue();
+ final int len = data.length;
+ out.writeVInt(len);
+ out.writeBytes(data, len);
+ } else {
+ out.writeString(stringValue());
+ }
+ }
+ }
+
+ protected int getCompressionLevel() {
+ return Deflater.BEST_COMPRESSION;
+ }
+
+ protected byte[] compress(byte[] input) {
+
+ // Create the compressor with highest level of compression
+ Deflater compressor = new Deflater();
+ compressor.setLevel(getCompressionLevel());
+
+ // Give the compressor the data to compress
+ compressor.setInput(input);
+ compressor.finish();
+
+ /*
+ * Create an expandable byte array to hold the compressed data.
+ * You cannot use an array that's the same size as the orginal because
+ * there is no guarantee that the compressed data will be smaller than
+ * the uncompressed data.
+ */
+ ByteArrayOutputStream bos = new ByteArrayOutputStream(input.length);
+
+ // Compress the data
+ byte[] buf = new byte[1024];
+ while (!compressor.finished()) {
+ int count = compressor.deflate(buf);
+ bos.write(buf, 0, count);
+ }
+
+ compressor.end();
+
+ // Get the compressed data
+ return bos.toByteArray();
+ }
+
+ protected byte[] uncompress(final byte[] input) throws IOException {
+
+ Inflater decompressor = new Inflater();
+ decompressor.setInput(input);
+
+ // Create an expandable byte array to hold the decompressed data
+ ByteArrayOutputStream bos = new ByteArrayOutputStream(input.length);
+
+ // Decompress the data
+ byte[] buf = new byte[1024];
+ while (!decompressor.finished()) {
+ try {
+ int count = decompressor.inflate(buf);
+ bos.write(buf, 0, count);
+ } catch (DataFormatException e) {
+ // this will happen if the field is not compressed
+ IOException newException = new IOException("field data are in wrong format: " + e.toString());
+ newException.initCause(e);
+ throw newException;
+ }
+ }
+
+ decompressor.end();
+
+ // Get the decompressed data
+ return bos.toByteArray();
+ }
+}
Index: src/java/org/apache/lucene/index/DefaultIndexFormat.java
===================================================================
--- src/java/org/apache/lucene/index/DefaultIndexFormat.java (révision 0)
+++ src/java/org/apache/lucene/index/DefaultIndexFormat.java (révision 0)
@@ -0,0 +1,17 @@
+package org.apache.lucene.index;
+
+import java.io.IOException;
+
+import org.apache.lucene.store.Directory;
+
+public class DefaultIndexFormat implements IndexFormat {
+
+ public FieldsReader getFieldsReader(Directory d, String segment, FieldInfos fn) throws IOException {
+ return new FieldsReader(d, segment, fn);
+ }
+
+ public FieldsWriter getFieldsWriter(Directory d, String segment, FieldInfos fn) throws IOException {
+ return new FieldsWriter(d, segment, fn);
+ }
+
+}
Index: src/java/org/apache/lucene/index/FieldsReader.java
===================================================================
--- src/java/org/apache/lucene/index/FieldsReader.java (révision 433317)
+++ src/java/org/apache/lucene/index/FieldsReader.java (copie de travail)
@@ -16,13 +16,8 @@
* limitations under the License.
*/
-import java.io.ByteArrayOutputStream;
import java.io.IOException;
-import java.io.Reader;
-import java.util.zip.DataFormatException;
-import java.util.zip.Inflater;
-import org.apache.lucene.document.AbstractField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldSelector;
@@ -38,14 +33,12 @@
*
* @version $Id$
*/
-final class FieldsReader {
+public class FieldsReader {
private FieldInfos fieldInfos;
private IndexInput fieldsStream;
private IndexInput indexStream;
private int size;
- private static ThreadLocal fieldsStreamTL = new ThreadLocal();
-
FieldsReader(Directory d, String segment, FieldInfos fn) throws IOException {
fieldInfos = fn;
@@ -60,14 +53,9 @@
*
* @throws IOException
*/
- final void close() throws IOException {
+ protected void close() throws IOException {
fieldsStream.close();
indexStream.close();
- IndexInput localFieldsStream = (IndexInput) fieldsStreamTL.get();
- if (localFieldsStream != null) {
- localFieldsStream.close();
- fieldsStreamTL.set(null);
- }
}
final int size() {
@@ -79,352 +67,39 @@
long position = indexStream.readLong();
fieldsStream.seek(position);
- Document doc = new Document();
+ Document doc = createDocument(fieldsStream);
+
int numFields = fieldsStream.readVInt();
for (int i = 0; i < numFields; i++) {
int fieldNumber = fieldsStream.readVInt();
FieldInfo fi = fieldInfos.fieldInfo(fieldNumber);
FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.accept(fi.name);
- boolean lazy = acceptField.equals(FieldSelectorResult.LAZY_LOAD) == true;
- byte bits = fieldsStream.readByte();
- boolean compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
- boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
- boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
- if (acceptField.equals(FieldSelectorResult.LOAD) == true) {
- addField(doc, fi, binary, compressed, tokenize);
- }
- else if (acceptField.equals(FieldSelectorResult.LOAD_FOR_MERGE) == true) {
- addFieldForMerge(doc, fi, binary, compressed, tokenize);
- }
- else if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK) == true){
- addField(doc, fi, binary, compressed, tokenize);
- break;//Get out of this loop
- }
- else if (lazy == true){
- addFieldLazy(doc, fi, binary, compressed, tokenize);
- }
- else {
- skipField(binary, compressed);
- }
- }
+ FieldData fieldData = createFieldData(fi.name);
+ fieldData.internalReadData(fieldsStream, acceptField.equals(FieldSelectorResult.LAZY_LOAD) || acceptField.equals(FieldSelectorResult.NO_LOAD));
- return doc;
- }
+ Fieldable field = createField(fi, fieldData);
- /**
- * Skip the field. We still have to read some of the information about the field, but can skip past the actual content.
- * This will have the most payoff on large fields.
- */
- private void skipField(boolean binary, boolean compressed) throws IOException {
-
- int toRead = fieldsStream.readVInt();
-
- if (binary || compressed) {
- long pointer = fieldsStream.getFilePointer();
- fieldsStream.seek(pointer + toRead);
- } else {
- //We need to skip chars. This will slow us down, but still better
- fieldsStream.skipChars(toRead);
- }
- }
-
- private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException {
- if (binary == true) {
- int toRead = fieldsStream.readVInt();
- long pointer = fieldsStream.getFilePointer();
- if (compressed) {
- //was: doc.add(new Fieldable(fi.name, uncompress(b), Fieldable.Store.COMPRESS));
- doc.add(new LazyField(fi.name, Field.Store.COMPRESS, toRead, pointer));
- } else {
- //was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES));
- doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer));
+ if (!acceptField.equals(FieldSelectorResult.NO_LOAD)) {
+ doc.add(field);
}
- //Need to move the pointer ahead by toRead positions
- fieldsStream.seek(pointer + toRead);
- } else {
- Field.Store store = Field.Store.YES;
- Field.Index index = getIndexType(fi, tokenize);
- Field.TermVector termVector = getTermVectorType(fi);
-
- Fieldable f;
- if (compressed) {
- store = Field.Store.COMPRESS;
- int toRead = fieldsStream.readVInt();
- long pointer = fieldsStream.getFilePointer();
- f = new LazyField(fi.name, store, toRead, pointer);
- //skip over the part that we aren't loading
- fieldsStream.seek(pointer + toRead);
- f.setOmitNorms(fi.omitNorms);
- } else {
- int length = fieldsStream.readVInt();
- long pointer = fieldsStream.getFilePointer();
- //Skip ahead of where we are by the length of what is stored
- fieldsStream.skipChars(length);
- f = new LazyField(fi.name, store, index, termVector, length, pointer);
- f.setOmitNorms(fi.omitNorms);
+ if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK)) {
+ break;
}
- doc.add(f);
}
+ return doc;
}
- // in merge mode we don't uncompress the data of a compressed field
- private void addFieldForMerge(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException {
- Object data;
-
- if (binary || compressed) {
- int toRead = fieldsStream.readVInt();
- final byte[] b = new byte[toRead];
- fieldsStream.readBytes(b, 0, b.length);
- data = b;
- } else {
- data = fieldsStream.readString();
- }
-
- doc.add(new FieldForMerge(data, fi, binary, compressed, tokenize));
+ protected Document createDocument(IndexInput fieldsStream) {
+ return new Document();
}
-
- private void addField(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException {
- //we have a binary stored field, and it may be compressed
- if (binary) {
- int toRead = fieldsStream.readVInt();
- final byte[] b = new byte[toRead];
- fieldsStream.readBytes(b, 0, b.length);
- if (compressed)
- doc.add(new Field(fi.name, uncompress(b), Field.Store.COMPRESS));
- else
- doc.add(new Field(fi.name, b, Field.Store.YES));
-
- } else {
- Field.Store store = Field.Store.YES;
- Field.Index index = getIndexType(fi, tokenize);
- Field.TermVector termVector = getTermVectorType(fi);
-
- Fieldable f;
- if (compressed) {
- store = Field.Store.COMPRESS;
- int toRead = fieldsStream.readVInt();
-
- final byte[] b = new byte[toRead];
- fieldsStream.readBytes(b, 0, b.length);
- f = new Field(fi.name, // field name
- new String(uncompress(b), "UTF-8"), // uncompress the value and add as string
- store,
- index,
- termVector);
- f.setOmitNorms(fi.omitNorms);
- } else {
- f = new Field(fi.name, // name
- fieldsStream.readString(), // read value
- store,
- index,
- termVector);
- f.setOmitNorms(fi.omitNorms);
- }
- doc.add(f);
- }
+ protected FieldData createFieldData(String fieldName) {
+ return new DefaultFieldData();
}
- private Field.TermVector getTermVectorType(FieldInfo fi) {
- Field.TermVector termVector = null;
- if (fi.storeTermVector) {
- if (fi.storeOffsetWithTermVector) {
- if (fi.storePositionWithTermVector) {
- termVector = Field.TermVector.WITH_POSITIONS_OFFSETS;
- } else {
- termVector = Field.TermVector.WITH_OFFSETS;
- }
- } else if (fi.storePositionWithTermVector) {
- termVector = Field.TermVector.WITH_POSITIONS;
- } else {
- termVector = Field.TermVector.YES;
- }
- } else {
- termVector = Field.TermVector.NO;
- }
- return termVector;
+ protected Fieldable createField(FieldInfo fi, FieldData data) {
+ return new Field(fi, data);
}
-
- private Field.Index getIndexType(FieldInfo fi, boolean tokenize) {
- Field.Index index;
- if (fi.isIndexed && tokenize)
- index = Field.Index.TOKENIZED;
- else if (fi.isIndexed && !tokenize)
- index = Field.Index.UN_TOKENIZED;
- else
- index = Field.Index.NO;
- return index;
- }
-
- /**
- * A Lazy implementation of Fieldable that differs loading of fields until asked for, instead of when the Document is
- * loaded.
- */
- private class LazyField extends AbstractField implements Fieldable {
- private int toRead;
- private long pointer;
- //internal buffer
- private char[] chars;
-
-
- public LazyField(String name, Field.Store store, int toRead, long pointer) {
- super(name, store, Field.Index.NO, Field.TermVector.NO);
- this.toRead = toRead;
- this.pointer = pointer;
- lazy = true;
- }
-
- public LazyField(String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer) {
- super(name, store, index, termVector);
- this.toRead = toRead;
- this.pointer = pointer;
- lazy = true;
- }
-
- /**
- * The value of the field in Binary, or null. If null, the Reader or
- * String value is used. Exactly one of stringValue(), readerValue() and
- * binaryValue() must be set.
- */
- public byte[] binaryValue() {
- if (fieldsData == null) {
- final byte[] b = new byte[toRead];
- IndexInput localFieldsStream = (IndexInput) fieldsStreamTL.get();
- if (localFieldsStream == null) {
- localFieldsStream = (IndexInput) fieldsStream.clone();
- fieldsStreamTL.set(localFieldsStream);
- }
- //Throw this IO Exception since IndexREader.document does so anyway, so probably not that big of a change for people
- //since they are already handling this exception when getting the document
- try {
- localFieldsStream.seek(pointer);
- localFieldsStream.readBytes(b, 0, b.length);
- if (isCompressed == true) {
- fieldsData = uncompress(b);
- } else {
- fieldsData = b;
- }
- } catch (IOException e) {
- throw new FieldReaderException(e);
- }
- }
- return fieldsData instanceof byte[] ? (byte[]) fieldsData : null;
- }
-
- /**
- * The value of the field as a Reader, or null. If null, the String value
- * or binary value is used. Exactly one of stringValue(), readerValue(),
- * and binaryValue() must be set.
- */
- public Reader readerValue() {
- return fieldsData instanceof Reader ? (Reader) fieldsData : null;
- }
-
- /**
- * The value of the field as a String, or null. If null, the Reader value
- * or binary value is used. Exactly one of stringValue(), readerValue(), and
- * binaryValue() must be set.
- */
- public String stringValue() {
- if (fieldsData == null) {
- IndexInput localFieldsStream = (IndexInput) fieldsStreamTL.get();
- if (localFieldsStream == null) {
- localFieldsStream = (IndexInput) fieldsStream.clone();
- fieldsStreamTL.set(localFieldsStream);
- }
- try {
- localFieldsStream.seek(pointer);
- //read in chars b/c we already know the length we need to read
- if (chars == null || toRead > chars.length)
- chars = new char[toRead];
- localFieldsStream.readChars(chars, 0, toRead);
- fieldsData = new String(chars, 0, toRead);//fieldsStream.readString();
- } catch (IOException e) {
- throw new FieldReaderException(e);
- }
- }
- return fieldsData instanceof String ? (String) fieldsData : null;
- }
-
- public long getPointer() {
- return pointer;
- }
-
- public void setPointer(long pointer) {
- this.pointer = pointer;
- }
-
- public int getToRead() {
- return toRead;
- }
-
- public void setToRead(int toRead) {
- this.toRead = toRead;
- }
- }
-
- private final byte[] uncompress(final byte[] input)
- throws IOException {
-
- Inflater decompressor = new Inflater();
- decompressor.setInput(input);
-
- // Create an expandable byte array to hold the decompressed data
- ByteArrayOutputStream bos = new ByteArrayOutputStream(input.length);
-
- // Decompress the data
- byte[] buf = new byte[1024];
- while (!decompressor.finished()) {
- try {
- int count = decompressor.inflate(buf);
- bos.write(buf, 0, count);
- }
- catch (DataFormatException e) {
- // this will happen if the field is not compressed
- IOException newException = new IOException("field data are in wrong format: " + e.toString());
- newException.initCause(e);
- throw newException;
- }
- }
-
- decompressor.end();
-
- // Get the decompressed data
- return bos.toByteArray();
- }
-
- // Instances of this class hold field properties and data
- // for merge
- final static class FieldForMerge extends AbstractField {
- public String stringValue() {
- return (String) this.fieldsData;
- }
-
- public Reader readerValue() {
- // not needed for merge
- return null;
- }
-
- public byte[] binaryValue() {
- return (byte[]) this.fieldsData;
- }
-
- public FieldForMerge(Object value, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) {
- this.isStored = true;
- this.fieldsData = value;
- this.isCompressed = compressed;
- this.isBinary = binary;
- this.isTokenized = tokenize;
-
- this.name = fi.name.intern();
- this.isIndexed = fi.isIndexed;
- this.omitNorms = fi.omitNorms;
- this.storeOffsetWithTermVector = fi.storeOffsetWithTermVector;
- this.storePositionWithTermVector = fi.storePositionWithTermVector;
- this.storeTermVector = fi.storeTermVector;
- }
-
- }
}
Index: src/java/org/apache/lucene/index/IndexReader.java
===================================================================
--- src/java/org/apache/lucene/index/IndexReader.java (révision 433317)
+++ src/java/org/apache/lucene/index/IndexReader.java (copie de travail)
@@ -22,6 +22,7 @@
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.Lock;
import java.io.File;
@@ -86,8 +87,9 @@
*
* @param directory Directory where IndexReader files reside.
*/
- protected IndexReader(Directory directory) {
+ protected IndexReader(Directory directory, IndexFormat indexFormat) {
this.directory = directory;
+ this.indexFormat = indexFormat;
}
/**
@@ -98,18 +100,20 @@
* @param segmentInfos Used for write-l
* @param closeDirectory
*/
- IndexReader(Directory directory, SegmentInfos segmentInfos, boolean closeDirectory) {
- init(directory, segmentInfos, closeDirectory, true);
+ IndexReader(Directory directory, IndexFormat indexFormat, SegmentInfos segmentInfos, boolean closeDirectory) {
+ init(directory, indexFormat, segmentInfos, closeDirectory, true);
}
- void init(Directory directory, SegmentInfos segmentInfos, boolean closeDirectory, boolean directoryOwner) {
+ void init(Directory directory, IndexFormat indexFormat, SegmentInfos segmentInfos, boolean closeDirectory, boolean directoryOwner) {
this.directory = directory;
+ this.indexFormat = indexFormat;
this.segmentInfos = segmentInfos;
this.directoryOwner = directoryOwner;
this.closeDirectory = closeDirectory;
}
private Directory directory;
+ private IndexFormat indexFormat;
private boolean directoryOwner;
private boolean closeDirectory;
@@ -122,21 +126,26 @@
/** Returns an IndexReader reading the index in an FSDirectory in the named
path. */
public static IndexReader open(String path) throws IOException {
- return open(FSDirectory.getDirectory(path, false), true);
+ return open(FSDirectory.getDirectory(path, false), new DefaultIndexFormat(), true);
}
/** Returns an IndexReader reading the index in an FSDirectory in the named
path. */
public static IndexReader open(File path) throws IOException {
- return open(FSDirectory.getDirectory(path, false), true);
+ return open(FSDirectory.getDirectory(path, false), new DefaultIndexFormat(), true);
}
/** Returns an IndexReader reading the index in the given Directory. */
public static IndexReader open(final Directory directory) throws IOException {
- return open(directory, false);
+ return open(directory, new DefaultIndexFormat(), false);
}
- private static IndexReader open(final Directory directory, final boolean closeDirectory) throws IOException {
+ /** Returns an IndexReader reading the index in the given Directory and the given index format. */
+ public static IndexReader open(final Directory directory, IndexFormat indexFormat) throws IOException {
+ return open(directory, indexFormat, false);
+ }
+
+ private static IndexReader open(final Directory directory, final IndexFormat indexFormat, final boolean closeDirectory) throws IOException {
synchronized (directory) { // in- & inter-process sync
return (IndexReader)new Lock.With(
directory.makeLock(IndexWriter.COMMIT_LOCK_NAME),
@@ -145,12 +154,12 @@
SegmentInfos infos = new SegmentInfos();
infos.read(directory);
if (infos.size() == 1) { // index is optimized
- return SegmentReader.get(infos, infos.info(0), closeDirectory);
+ return SegmentReader.get(infos, indexFormat, infos.info(0), closeDirectory);
}
IndexReader[] readers = new IndexReader[infos.size()];
for (int i = 0; i < infos.size(); i++)
- readers[i] = SegmentReader.get(infos.info(i));
- return new MultiReader(directory, infos, closeDirectory, readers);
+ readers[i] = SegmentReader.get(infos.info(i), indexFormat);
+ return new MultiReader(directory, indexFormat, infos, closeDirectory, readers);
}
}.run();
@@ -161,6 +170,13 @@
public Directory directory() { return directory; }
/**
+ * @return the index format use by this reader
+ */
+ public IndexFormat getIndexFormat() {
+ return indexFormat;
+ }
+
+ /**
* Returns the time the index in the named directory was last modified.
* Do not use this to check whether the reader is still up-to-date, use
* {@link #isCurrent()} instead.
Index: src/java/org/apache/lucene/index/FilterIndexReader.java
===================================================================
--- src/java/org/apache/lucene/index/FilterIndexReader.java (révision 433317)
+++ src/java/org/apache/lucene/index/FilterIndexReader.java (copie de travail)
@@ -18,6 +18,7 @@
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
+import org.apache.lucene.store.IndexOutput;
import java.io.IOException;
@@ -85,7 +86,17 @@
* @param in specified base reader.
*/
public FilterIndexReader(IndexReader in) {
- super(in.directory());
+ this(in, new DefaultIndexFormat());
+ }
+
+ /**
+ * Same contructor as the previous one, with a custom format index.
+ *
+ * @param in specified base reader
+ * @param indexformat the format to use
+ */
+ public FilterIndexReader(IndexReader in, IndexFormat indexformat) {
+ super(in.directory(), indexformat);
this.in = in;
}
Index: src/java/org/apache/lucene/index/FieldsWriter.java
===================================================================
--- src/java/org/apache/lucene/index/FieldsWriter.java (révision 433317)
+++ src/java/org/apache/lucene/index/FieldsWriter.java (copie de travail)
@@ -16,10 +16,8 @@
* the License.
*/
-import java.io.ByteArrayOutputStream;
import java.io.IOException;
-import java.util.Enumeration;
-import java.util.zip.Deflater;
+import java.util.Iterator;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@@ -27,12 +25,8 @@
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexOutput;
-final class FieldsWriter
-{
- static final byte FIELD_IS_TOKENIZED = 0x1;
- static final byte FIELD_IS_BINARY = 0x2;
- static final byte FIELD_IS_COMPRESSED = 0x4;
-
+public class FieldsWriter {
+
private FieldInfos fieldInfos;
private IndexOutput fieldsStream;
@@ -53,100 +47,28 @@
final void addDocument(Document doc) throws IOException {
indexStream.writeLong(fieldsStream.getFilePointer());
+ writeDocumentData(fieldsStream, doc);
+
int storedCount = 0;
- Enumeration fields = doc.fields();
- while (fields.hasMoreElements()) {
- Fieldable field = (Fieldable) fields.nextElement();
+ Iterator fields = doc.getFields().iterator();
+ while (fields.hasNext()) {
+ Fieldable field = (Fieldable) fields.next();
if (field.isStored())
storedCount++;
}
fieldsStream.writeVInt(storedCount);
- fields = doc.fields();
- while (fields.hasMoreElements()) {
- Fieldable field = (Fieldable) fields.nextElement();
- // if the field as an instanceof FieldsReader.FieldForMerge, we're in merge mode
- // and field.binaryValue() already returns the compressed value for a field
- // with isCompressed()==true, so we disable compression in that case
- boolean disableCompression = (field instanceof FieldsReader.FieldForMerge);
+ fields = doc.getFields().iterator();
+ while (fields.hasNext()) {
+ Fieldable field = (Fieldable) fields.next();
if (field.isStored()) {
fieldsStream.writeVInt(fieldInfos.fieldNumber(field.name()));
-
- byte bits = 0;
- if (field.isTokenized())
- bits |= FieldsWriter.FIELD_IS_TOKENIZED;
- if (field.isBinary())
- bits |= FieldsWriter.FIELD_IS_BINARY;
- if (field.isCompressed())
- bits |= FieldsWriter.FIELD_IS_COMPRESSED;
-
- fieldsStream.writeByte(bits);
-
- if (field.isCompressed()) {
- // compression is enabled for the current field
- byte[] data = null;
-
- if (disableCompression) {
- // optimized case for merging, the data
- // is already compressed
- data = field.binaryValue();
- } else {
- // check if it is a binary field
- if (field.isBinary()) {
- data = compress(field.binaryValue());
- }
- else {
- data = compress(field.stringValue().getBytes("UTF-8"));
- }
- }
- final int len = data.length;
- fieldsStream.writeVInt(len);
- fieldsStream.writeBytes(data, len);
- }
- else {
- // compression is disabled for the current field
- if (field.isBinary()) {
- byte[] data = field.binaryValue();
- final int len = data.length;
- fieldsStream.writeVInt(len);
- fieldsStream.writeBytes(data, len);
- }
- else {
- fieldsStream.writeString(field.stringValue());
- }
- }
+ field.getData().internalWriteData(fieldsStream);
}
}
}
- private final byte[] compress (byte[] input) {
-
- // Create the compressor with highest level of compression
- Deflater compressor = new Deflater();
- compressor.setLevel(Deflater.BEST_COMPRESSION);
-
- // Give the compressor the data to compress
- compressor.setInput(input);
- compressor.finish();
-
- /*
- * Create an expandable byte array to hold the compressed data.
- * You cannot use an array that's the same size as the orginal because
- * there is no guarantee that the compressed data will be smaller than
- * the uncompressed data.
- */
- ByteArrayOutputStream bos = new ByteArrayOutputStream(input.length);
-
- // Compress the data
- byte[] buf = new byte[1024];
- while (!compressor.finished()) {
- int count = compressor.deflate(buf);
- bos.write(buf, 0, count);
- }
-
- compressor.end();
-
- // Get the compressed data
- return bos.toByteArray();
+ protected void writeDocumentData(IndexOutput out, Document doc) throws IOException {
+ //nothing to write
}
}
Index: src/java/org/apache/lucene/index/IndexWriter.java
===================================================================
--- src/java/org/apache/lucene/index/IndexWriter.java (révision 433317)
+++ src/java/org/apache/lucene/index/IndexWriter.java (copie de travail)
@@ -123,6 +123,8 @@
private boolean closeDir;
+ private IndexFormat indexFormat;
+
/** Get the current setting of whether to use the compound file format.
* Note that this just returns the value you set with setUseCompoundFile(boolean)
* or the default. You cannot use this to query the status of an existing index.
@@ -204,7 +206,7 @@
*/
public IndexWriter(String path, Analyzer a, boolean create)
throws IOException {
- this(FSDirectory.getDirectory(path, create), a, create, true);
+ this(FSDirectory.getDirectory(path, create), new DefaultIndexFormat(), a, create, true);
}
/**
@@ -224,10 +226,32 @@
*/
public IndexWriter(File path, Analyzer a, boolean create)
throws IOException {
- this(FSDirectory.getDirectory(path, create), a, create, true);
+ this(FSDirectory.getDirectory(path, create), new DefaultIndexFormat(), a, create, true);
}
/**
+ * Constructs an IndexWriter for the index in <code>d</code> in a
+ * custom format.
+ * Text will be analyzed with <code>a</code>. If <code>create</code>
+ * is true, then a new, empty index will be created in
+ * <code>d</code>, replacing the index already there, if any.
+ *
+ * @param d the index directory
+ * @param indexFormat the format to use
+ * @param a the analyzer to use
+ * @param create <code>true</code> to create the index or overwrite
+ * the existing one; <code>false</code> to append to the existing
+ * index
+ * @throws IOException if the directory cannot be read/written to, or
+ * if it does not exist, and <code>create</code> is
+ * <code>false</code>
+ */
+ public IndexWriter(Directory d, IndexFormat indexFormat, Analyzer a, boolean create)
+ throws IOException {
+ this(d, indexFormat, a, create, false);
+ }
+
+ /**
* Constructs an IndexWriter for the index in <code>d</code>.
* Text will be analyzed with <code>a</code>. If <code>create</code>
* is true, then a new, empty index will be created in
@@ -244,15 +268,16 @@
*/
public IndexWriter(Directory d, Analyzer a, boolean create)
throws IOException {
- this(d, a, create, false);
+ this(d, new DefaultIndexFormat(), a, create, false);
}
- private IndexWriter(Directory d, Analyzer a, final boolean create, boolean closeDir)
+ private IndexWriter(Directory d, IndexFormat indexFormat, Analyzer a, final boolean create, boolean closeDir)
throws IOException {
this.closeDir = closeDir;
directory = d;
analyzer = a;
-
+ this.indexFormat = indexFormat;
+
Lock writeLock = directory.makeLock(IndexWriter.WRITE_LOCK_NAME);
if (!writeLock.obtain(writeLockTimeout)) // obtain write lock
throw new IOException("Index locked for write: " + writeLock);
@@ -505,7 +530,7 @@
*/
public void addDocument(Document doc, Analyzer analyzer) throws IOException {
DocumentWriter dw =
- new DocumentWriter(ramDirectory, analyzer, this);
+ new DocumentWriter(ramDirectory, indexFormat, analyzer, this);
dw.setInfoStream(infoStream);
String segmentName = newSegmentName();
dw.addDocument(segmentName, doc);
@@ -628,12 +653,12 @@
optimize(); // start with zero or 1 seg
final String mergedName = newSegmentName();
- SegmentMerger merger = new SegmentMerger(this, mergedName);
+ SegmentMerger merger = new SegmentMerger(this, indexFormat, mergedName);
final Vector segmentsToDelete = new Vector();
IndexReader sReader = null;
if (segmentInfos.size() == 1){ // add existing index, if any
- sReader = SegmentReader.get(segmentInfos.info(0));
+ sReader = SegmentReader.get(segmentInfos.info(0), indexFormat);
merger.add(sReader);
segmentsToDelete.addElement(sReader); // queue segment for deletion
}
@@ -733,14 +758,14 @@
throws IOException {
final String mergedName = newSegmentName();
if (infoStream != null) infoStream.print("merging segments");
- SegmentMerger merger = new SegmentMerger(this, mergedName);
+ SegmentMerger merger = new SegmentMerger(this, indexFormat, mergedName);
final Vector segmentsToDelete = new Vector();
for (int i = minSegment; i < end; i++) {
SegmentInfo si = segmentInfos.info(i);
if (infoStream != null)
infoStream.print(" " + si.name + " (" + si.docCount + " docs)");
- IndexReader reader = SegmentReader.get(si);
+ IndexReader reader = SegmentReader.get(si, indexFormat);
merger.add(reader);
if ((reader.directory() == this.directory) || // if we own the directory
(reader.directory() == this.ramDirectory))
Index: src/java/org/apache/lucene/index/SegmentMerger.java
===================================================================
--- src/java/org/apache/lucene/index/SegmentMerger.java (révision 433317)
+++ src/java/org/apache/lucene/index/SegmentMerger.java (copie de travail)
@@ -45,18 +45,21 @@
private Vector readers = new Vector();
private FieldInfos fieldInfos;
+ private final IndexFormat indexFormat;
- /** This ctor used only by test code.
+ /** This constructor is used only by test code.
*
* @param dir The Directory to merge the other segments into
* @param name The name of the new segment
*/
SegmentMerger(Directory dir, String name) {
directory = dir;
+ indexFormat = new DefaultIndexFormat();
segment = name;
}
- SegmentMerger(IndexWriter writer, String name) {
+ SegmentMerger(IndexWriter writer, IndexFormat indexFormat, String name) {
+ this.indexFormat = indexFormat;
directory = writer.getDirectory();
segment = name;
termIndexInterval = writer.getTermIndexInterval();
@@ -177,14 +180,13 @@
}
fieldInfos.write(directory, segment + ".fnm");
- FieldsWriter fieldsWriter = // merge field values
- new FieldsWriter(directory, segment, fieldInfos);
+ FieldsWriter fieldsWriter = indexFormat.getFieldsWriter(directory, segment, fieldInfos);
// for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're
// in merge mode, we use this FieldSelector
FieldSelector fieldSelectorMerge = new FieldSelector() {
public FieldSelectorResult accept(String fieldName) {
- return FieldSelectorResult.LOAD_FOR_MERGE;
+ return FieldSelectorResult.LAZY_LOAD;
}
};
Index: src/java/org/apache/lucene/index/DocumentWriter.java
===================================================================
--- src/java/org/apache/lucene/index/DocumentWriter.java (révision 433317)
+++ src/java/org/apache/lucene/index/DocumentWriter.java (copie de travail)
@@ -41,24 +41,28 @@
private int maxFieldLength;
private int termIndexInterval = IndexWriter.DEFAULT_TERM_INDEX_INTERVAL;
private PrintStream infoStream;
+ private final IndexFormat indexFormat;
/** This ctor used by test code only.
*
* @param directory The directory to write the document information to
+ * @param indexFormat the format of index to use
* @param analyzer The analyzer to use for the document
* @param similarity The Similarity function
* @param maxFieldLength The maximum number of tokens a field may have
*/
- DocumentWriter(Directory directory, Analyzer analyzer,
+ DocumentWriter(Directory directory, IndexFormat indexFormat, Analyzer analyzer,
Similarity similarity, int maxFieldLength) {
this.directory = directory;
+ this.indexFormat = indexFormat;
this.analyzer = analyzer;
this.similarity = similarity;
this.maxFieldLength = maxFieldLength;
}
- DocumentWriter(Directory directory, Analyzer analyzer, IndexWriter writer) {
+ DocumentWriter(Directory directory, IndexFormat indexFormat, Analyzer analyzer, IndexWriter writer) {
this.directory = directory;
+ this.indexFormat = indexFormat;
this.analyzer = analyzer;
this.similarity = writer.getSimilarity();
this.maxFieldLength = writer.getMaxFieldLength();
@@ -73,8 +77,7 @@
fieldInfos.write(directory, segment + ".fnm");
// write field values
- FieldsWriter fieldsWriter =
- new FieldsWriter(directory, segment, fieldInfos);
+ FieldsWriter fieldsWriter = indexFormat.getFieldsWriter(directory, segment, fieldInfos);
try {
fieldsWriter.addDocument(doc);
} finally {
Index: src/java/org/apache/lucene/index/ParallelReader.java
===================================================================
--- src/java/org/apache/lucene/index/ParallelReader.java (révision 433317)
+++ src/java/org/apache/lucene/index/ParallelReader.java (copie de travail)
@@ -20,6 +20,8 @@
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.document.FieldSelectorResult;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.RAMDirectory;
import java.io.IOException;
import java.util.SortedMap;
@@ -63,7 +65,7 @@
private boolean hasDeletions;
/** Construct a ParallelReader. */
- public ParallelReader() throws IOException { super(null); }
+ public ParallelReader() throws IOException { super(null, new DefaultIndexFormat()); }
/** Add an IndexReader. */
public void add(IndexReader reader) throws IOException {
Index: src/java/org/apache/lucene/index/SegmentReader.java
===================================================================
--- src/java/org/apache/lucene/index/SegmentReader.java (révision 433317)
+++ src/java/org/apache/lucene/index/SegmentReader.java (copie de travail)
@@ -104,18 +104,18 @@
}
}
- protected SegmentReader() { super(null); }
+ protected SegmentReader() { super(null, null); }
- public static SegmentReader get(SegmentInfo si) throws IOException {
- return get(si.dir, si, null, false, false);
+ public static SegmentReader get(SegmentInfo si, IndexFormat indexFormat) throws IOException {
+ return get(si.dir, indexFormat, si, null, false, false);
}
- public static SegmentReader get(SegmentInfos sis, SegmentInfo si,
+ public static SegmentReader get(SegmentInfos sis, IndexFormat indexFormat, SegmentInfo si,
boolean closeDir) throws IOException {
- return get(si.dir, si, sis, closeDir, true);
+ return get(si.dir, indexFormat, si, sis, closeDir, true);
}
- public static SegmentReader get(Directory dir, SegmentInfo si,
+ public static SegmentReader get(Directory dir, IndexFormat indexFormat, SegmentInfo si,
SegmentInfos sis,
boolean closeDir, boolean ownDir)
throws IOException {
@@ -125,7 +125,7 @@
} catch (Exception e) {
throw new RuntimeException("cannot load SegmentReader class: " + e, e);
}
- instance.init(dir, sis, closeDir, ownDir);
+ instance.init(dir, indexFormat, sis, closeDir, ownDir);
instance.initialize(si);
return instance;
}
@@ -142,7 +142,7 @@
// No compound file exists - use the multi-file format
fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");
- fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos);
+ fieldsReader = getIndexFormat().getFieldsReader(cfsDir, segment, fieldInfos);
tis = new TermInfosReader(cfsDir, segment, fieldInfos);
Index: src/java/org/apache/lucene/store/IndexOutput.java
===================================================================
--- src/java/org/apache/lucene/store/IndexOutput.java (révision 433317)
+++ src/java/org/apache/lucene/store/IndexOutput.java (copie de travail)
@@ -30,6 +30,10 @@
*/
public abstract void writeByte(byte b) throws IOException;
+ public void writeByte(IndexInput in) throws IOException {
+ writeByte(in.readByte());
+ }
+
/** Writes an array of bytes.
* @param b the bytes to write
* @param length the number of bytes to write
@@ -37,6 +41,12 @@
*/
public abstract void writeBytes(byte[] b, int length) throws IOException;
+ public void writeBytes(IndexInput in, long length) throws IOException {
+ while (length-- > 0) {
+ writeByte(in.readByte());
+ }
+ }
+
/** Writes an int as four bytes.
* @see IndexInput#readInt()
*/
@@ -47,6 +57,16 @@
writeByte((byte) i);
}
+ /** Writes an int as four bytes.
+ * @see IndexInput#readInt()
+ */
+ public void writeInt(IndexInput in) throws IOException {
+ writeByte(in.readByte());
+ writeByte(in.readByte());
+ writeByte(in.readByte());
+ writeByte(in.readByte());
+ }
+
/** Writes an int in a variable-length format. Writes between one and
* five bytes. Smaller values take fewer bytes. Negative numbers are not
* supported.
@@ -60,6 +80,20 @@
writeByte((byte)i);
}
+ /** Writes an int in a variable-length format. Writes between one and
+ * five bytes. Smaller values take fewer bytes. Negative numbers are not
+ * supported.
+ * @see IndexInput#readVInt()
+ */
+ public void writeVInt(IndexInput in) throws IOException {
+ byte b = in.readByte();
+ writeByte(b);
+ while ((b & 0x80) != 0) {
+ b = in.readByte();
+ writeByte(b);
+ }
+ }
+
/** Writes a long as eight bytes.
* @see IndexInput#readLong()
*/
@@ -68,6 +102,20 @@
writeInt((int) i);
}
+ /** Writes a long as eight bytes.
+ * @see IndexInput#readLong()
+ */
+ public void writeLong(IndexInput in) throws IOException {
+ writeByte(in.readByte());
+ writeByte(in.readByte());
+ writeByte(in.readByte());
+ writeByte(in.readByte());
+ writeByte(in.readByte());
+ writeByte(in.readByte());
+ writeByte(in.readByte());
+ writeByte(in.readByte());
+ }
+
/** Writes an long in a variable-length format. Writes between one and five
* bytes. Smaller values take fewer bytes. Negative numbers are not
* supported.
@@ -81,6 +129,20 @@
writeByte((byte)i);
}
+ /** Writes an long in a variable-length format. Writes between one and five
+ * bytes. Smaller values take fewer bytes. Negative numbers are not
+ * supported.
+ * @see IndexInput#readVLong()
+ */
+ public void writeVLong(IndexInput in) throws IOException {
+ byte b = in.readByte();
+ writeByte(b);
+ while ((b & 0x80) != 0) {
+ b = in.readByte();
+ writeByte(b);
+ }
+ }
+
/** Writes a string.
* @see IndexInput#readString()
*/
@@ -90,6 +152,15 @@
writeChars(s, 0, length);
}
+ /** Writes a string.
+ * @see IndexInput#readString()
+ */
+ public void writeString(IndexInput in) throws IOException {
+ int length = in.readVInt();
+ writeVInt(length);
+ writeChars(in, length);
+ }
+
/** Writes a sequence of UTF-8 encoded characters from a string.
* @param s the source of the characters
* @param start the first character in the sequence
@@ -102,18 +173,38 @@
for (int i = start; i < end; i++) {
final int code = (int)s.charAt(i);
if (code >= 0x01 && code <= 0x7F)
- writeByte((byte)code);
+ writeByte((byte)code);
else if (((code >= 0x80) && (code <= 0x7FF)) || code == 0) {
- writeByte((byte)(0xC0 | (code >> 6)));
- writeByte((byte)(0x80 | (code & 0x3F)));
+ writeByte((byte)(0xC0 | (code >> 6)));
+ writeByte((byte)(0x80 | (code & 0x3F)));
} else {
- writeByte((byte)(0xE0 | (code >>> 12)));
- writeByte((byte)(0x80 | ((code >> 6) & 0x3F)));
- writeByte((byte)(0x80 | (code & 0x3F)));
+ writeByte((byte)(0xE0 | (code >>> 12)));
+ writeByte((byte)(0x80 | ((code >> 6) & 0x3F)));
+ writeByte((byte)(0x80 | (code & 0x3F)));
}
}
}
+ /** Writes a sequence of UTF-8 encoded characters from a string.
+ * @param s the source of the characters
+ * @param start the first character in the sequence
+ * @param length the number of characters in the sequence
+ * @see IndexInput#readChars(char[],int,int)
+ */
+ public void writeChars(IndexInput in, int length)
+ throws IOException {
+ for (int i = 0; i < length; i++) {
+ byte b = in.readByte();
+ writeByte(b);
+ if ((b & 0x80) != 0) {
+ writeByte(in.readByte());
+ if ((b & 0xE0) == 0xE0) {
+ writeByte(in.readByte());
+ }
+ }
+ }
+ }
+
/** Forces any buffered output to be written. */
public abstract void flush() throws IOException;
Index: src/java/org/apache/lucene/document/Field.java
===================================================================
--- src/java/org/apache/lucene/document/Field.java (révision 433317)
+++ src/java/org/apache/lucene/document/Field.java (copie de travail)
@@ -16,11 +16,14 @@
* limitations under the License.
*/
-import org.apache.lucene.util.Parameter;
-
import java.io.Reader;
import java.io.Serializable;
+import org.apache.lucene.index.DefaultFieldData;
+import org.apache.lucene.index.FieldData;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.util.Parameter;
+
/**
A field is a section of a Document. Each field has two parts, a name and a
value. Values may be free text, provided as a String or as a Reader, or they
@@ -31,6 +34,8 @@
public final class Field extends AbstractField implements Fieldable, Serializable {
+ private DefaultFieldData fieldData = null;
+
/** Specifies whether and how a field should be stored. */
public static final class Store extends Parameter implements Serializable {
@@ -127,22 +132,6 @@
public static final TermVector WITH_POSITIONS_OFFSETS = new TermVector("WITH_POSITIONS_OFFSETS");
}
-
- /** The value of the field as a String, or null. If null, the Reader value
- * or binary value is used. Exactly one of stringValue(), readerValue(), and
- * binaryValue() must be set. */
- public String stringValue() { return fieldsData instanceof String ? (String)fieldsData : null; }
-
- /** The value of the field as a Reader, or null. If null, the String value
- * or binary value is used. Exactly one of stringValue(), readerValue(),
- * and binaryValue() must be set. */
- public Reader readerValue() { return fieldsData instanceof Reader ? (Reader)fieldsData : null; }
-
- /** The value of the field in Binary, or null. If null, the Reader or
- * String value is used. Exactly one of stringValue(), readerValue() and
- * binaryValue() must be set. */
- public byte[] binaryValue() { return fieldsData instanceof byte[] ? (byte[])fieldsData : null; }
-
/**
* Create a field by specifying its name, value and how it will
* be saved in the index. Term vectors will not be stored in the index.
@@ -177,57 +166,19 @@
* </ul>
*/
public Field(String name, String value, Store store, Index index, TermVector termVector) {
- if (name == null)
- throw new NullPointerException("name cannot be null");
- if (value == null)
- throw new NullPointerException("value cannot be null");
+ super(name, new DefaultFieldData(value), store, index, termVector);
if (name.length() == 0 && value.length() == 0)
throw new IllegalArgumentException("name and value cannot both be empty");
- if (index == Index.NO && store == Store.NO)
- throw new IllegalArgumentException("it doesn't make sense to have a field that "
- + "is neither indexed nor stored");
- if (index == Index.NO && termVector != TermVector.NO)
- throw new IllegalArgumentException("cannot store term vector information "
- + "for a field that is not indexed");
-
- this.name = name.intern(); // field names are interned
- this.fieldsData = value;
+ }
- if (store == Store.YES){
- this.isStored = true;
- this.isCompressed = false;
- }
- else if (store == Store.COMPRESS) {
- this.isStored = true;
- this.isCompressed = true;
- }
- else if (store == Store.NO){
- this.isStored = false;
- this.isCompressed = false;
- }
- else
- throw new IllegalArgumentException("unknown store parameter " + store);
-
- if (index == Index.NO) {
- this.isIndexed = false;
- this.isTokenized = false;
- } else if (index == Index.TOKENIZED) {
- this.isIndexed = true;
- this.isTokenized = true;
- } else if (index == Index.UN_TOKENIZED) {
- this.isIndexed = true;
- this.isTokenized = false;
- } else if (index == Index.NO_NORMS) {
- this.isIndexed = true;
- this.isTokenized = false;
- this.omitNorms = true;
- } else {
- throw new IllegalArgumentException("unknown index parameter " + index);
- }
-
- this.isBinary = false;
-
- setStoreTermVector(termVector);
+ /**
+ * Contructor used only when reading in the index
+ *
+ * @param fi
+ * @param data
+ */
+ public Field(FieldInfo fi, FieldData data) {
+ super(fi, data);
}
/**
@@ -252,23 +203,7 @@
* @throws NullPointerException if name or reader is <code>null</code>
*/
public Field(String name, Reader reader, TermVector termVector) {
- if (name == null)
- throw new NullPointerException("name cannot be null");
- if (reader == null)
- throw new NullPointerException("reader cannot be null");
-
- this.name = name.intern(); // field names are interned
- this.fieldsData = reader;
-
- this.isStored = false;
- this.isCompressed = false;
-
- this.isIndexed = true;
- this.isTokenized = true;
-
- this.isBinary = false;
-
- setStoreTermVector(termVector);
+ super(name, new DefaultFieldData(reader), Store.NO, Index.TOKENIZED, termVector);
}
/**
@@ -280,34 +215,84 @@
* @throws IllegalArgumentException if store is <code>Store.NO</code>
*/
public Field(String name, byte[] value, Store store) {
- if (name == null)
- throw new IllegalArgumentException("name cannot be null");
- if (value == null)
- throw new IllegalArgumentException("value cannot be null");
-
- this.name = name.intern();
- this.fieldsData = value;
-
- if (store == Store.YES){
- this.isStored = true;
- this.isCompressed = false;
- }
- else if (store == Store.COMPRESS) {
- this.isStored = true;
- this.isCompressed = true;
- }
- else if (store == Store.NO)
- throw new IllegalArgumentException("binary values can't be unstored");
- else
+ super(name, new DefaultFieldData(value), store, Index.NO, TermVector.NO);
+ }
+
+ protected void setStore(Field.Store store) {
+ if (store == Field.Store.YES) {
+ fieldData.setStored(true);
+ fieldData.setCompressed(false);
+ } else if (store == Field.Store.COMPRESS) {
+ fieldData.setStored(true);
+ fieldData.setCompressed(true);
+ } else if (store == Field.Store.NO) {
+ if (isBinary()) {
+ throw new IllegalArgumentException("binary values can't be unstored");
+ }
+ fieldData.setStored(false);
+ fieldData.setCompressed(false);
+ } else {
throw new IllegalArgumentException("unknown store parameter " + store);
-
- this.isIndexed = false;
- this.isTokenized = false;
-
- this.isBinary = true;
-
- setStoreTermVector(TermVector.NO);
+ }
}
+ /** True if the value of the field is stored and compressed within the index */
+ public final boolean isCompressed() { return fieldData.isCompressed(); }
+ /** Prints a Field for human consumption. */
+ public String toString() {
+ StringBuffer result = new StringBuffer();
+ if (isStored()) {
+ result.append("stored");
+ if (isCompressed())
+ result.append("/compressed");
+ else
+ result.append("/uncompressed");
+ }
+ if (isIndexed) {
+ if (result.length() > 0)
+ result.append(",");
+ result.append("indexed");
+ }
+ if (isTokenized()) {
+ if (result.length() > 0)
+ result.append(",");
+ result.append("tokenized");
+ }
+ if (storeTermVector) {
+ if (result.length() > 0)
+ result.append(",");
+ result.append("termVector");
+ }
+ if (storeOffsetWithTermVector) {
+ if (result.length() > 0)
+ result.append(",");
+ result.append("termVectorOffsets");
+ }
+ if (storePositionWithTermVector) {
+ if (result.length() > 0)
+ result.append(",");
+ result.append("termVectorPosition");
+ }
+ if (isBinary()) {
+ if (result.length() > 0)
+ result.append(",");
+ result.append("binary");
+ }
+ if (omitNorms) {
+ result.append(",omitNorms");
+ }
+ if (isLazy()) {
+ result.append(",lazy");
+ }
+ result.append('<');
+ result.append(name);
+ result.append(':');
+
+ result.append(fieldData);
+
+ result.append('>');
+ return result.toString();
+ }
+
}
Index: src/java/org/apache/lucene/document/AbstractField.java
===================================================================
--- src/java/org/apache/lucene/document/AbstractField.java (révision 433317)
+++ src/java/org/apache/lucene/document/AbstractField.java (copie de travail)
@@ -15,7 +15,11 @@
* limitations under the License.
*/
+import java.io.Reader;
+import org.apache.lucene.index.FieldData;
+import org.apache.lucene.index.FieldInfo;
+
/**
*
*
@@ -27,60 +31,46 @@
protected boolean storeOffsetWithTermVector = false;
protected boolean storePositionWithTermVector = false;
protected boolean omitNorms = false;
- protected boolean isStored = false;
protected boolean isIndexed = true;
- protected boolean isTokenized = true;
- protected boolean isBinary = false;
- protected boolean isCompressed = false;
- protected boolean lazy = false;
protected float boost = 1.0f;
// the one and only data object for all different kind of field values
- protected Object fieldsData = null;
+ protected FieldData fieldData = null;
- protected AbstractField()
- {
-
+ /**
+ * Contructor used only when reading in the index
+ *
+ * @param fi
+ * @param data
+ */
+ protected AbstractField(FieldInfo fi, FieldData data) {
+ name = fi.getName().intern();
+ isIndexed = fi.isIndexed();
+ omitNorms = fi.omitNorms();
+ storeTermVector = fi.storeTermVector();
+ storeOffsetWithTermVector = fi.storeOffsetWithTermVector();
+ storePositionWithTermVector = fi.storePositionWithTermVector();
+ fieldData = data;
}
- protected AbstractField(String name, Field.Store store, Field.Index index, Field.TermVector termVector) {
+ protected AbstractField(String name, FieldData data, Field.Store store, Field.Index index, Field.TermVector termVector) {
if (name == null)
throw new NullPointerException("name cannot be null");
+ if (data == null)
+ throw new NullPointerException("data cannot be null");
+
this.name = name.intern(); // field names are interned
- if (store == Field.Store.YES){
- this.isStored = true;
- this.isCompressed = false;
+ fieldData = data;
+
+ if (index == Field.Index.NO && store == Field.Store.NO) {
+ throw new IllegalArgumentException("it doesn't make sense to have a field that " + "is neither indexed nor stored");
}
- else if (store == Field.Store.COMPRESS) {
- this.isStored = true;
- this.isCompressed = true;
+ if (index == Field.Index.NO && termVector != Field.TermVector.NO) {
+ throw new IllegalArgumentException("cannot store term vector information " + "for a field that is not indexed");
}
- else if (store == Field.Store.NO){
- this.isStored = false;
- this.isCompressed = false;
- }
- else
- throw new IllegalArgumentException("unknown store parameter " + store);
- if (index == Field.Index.NO) {
- this.isIndexed = false;
- this.isTokenized = false;
- } else if (index == Field.Index.TOKENIZED) {
- this.isIndexed = true;
- this.isTokenized = true;
- } else if (index == Field.Index.UN_TOKENIZED) {
- this.isIndexed = true;
- this.isTokenized = false;
- } else if (index == Field.Index.NO_NORMS) {
- this.isIndexed = true;
- this.isTokenized = false;
- this.omitNorms = true;
- } else {
- throw new IllegalArgumentException("unknown index parameter " + index);
- }
-
- this.isBinary = false;
-
+ setStore(store);
+ setIndex(index);
setStoreTermVector(termVector);
}
@@ -155,10 +145,42 @@
}
}
+ protected void setIndex(Field.Index index) {
+ if (index == Field.Index.NO) {
+ isIndexed = false;
+ fieldData.setTokenized(false);
+ } else if (index == Field.Index.TOKENIZED) {
+ isIndexed = true;
+ fieldData.setTokenized(true);
+ } else if (index == Field.Index.UN_TOKENIZED) {
+ isIndexed = true;
+ fieldData.setTokenized(false);
+ } else if (index == Field.Index.NO_NORMS) {
+ isIndexed = true;
+ fieldData.setTokenized(false);
+ omitNorms = true;
+ } else {
+ throw new IllegalArgumentException("unknown index parameter " + index);
+ }
+ }
+
+ protected void setStore(Field.Store store) {
+ if (store == Field.Store.YES) {
+ fieldData.setStored(true);
+ } else if (store == Field.Store.NO) {
+ if (isBinary()) {
+ throw new IllegalArgumentException("binary values can't be unstored");
+ }
+ fieldData.setStored(false);
+ } else {
+ throw new IllegalArgumentException("unknown store parameter " + store);
+ }
+ }
+
/** True iff the value of the field is to be stored in the index for return
with search hits. It is an error for this to be true if a field is
Reader-valued. */
- public final boolean isStored() { return isStored; }
+ public final boolean isStored() { return fieldData.isStored(); }
/** True iff the value of the field is to be indexed, so that it may be
searched on. */
@@ -167,11 +189,8 @@
/** True iff the value of the field should be tokenized as text prior to
indexing. Un-tokenized fields are indexed as a single word and may not be
Reader-valued. */
- public final boolean isTokenized() { return isTokenized; }
+ public final boolean isTokenized() { return fieldData.isTokenized(); }
- /** True if the value of the field is stored and compressed within the index */
- public final boolean isCompressed() { return isCompressed; }
-
/** True iff the term or terms used to index this field are stored as a term
* vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}.
* These methods do not provide access to the original content of the field,
@@ -198,7 +217,7 @@
}
/** True iff the value of the filed is stored as binary */
- public final boolean isBinary() { return isBinary; }
+ public final boolean isBinary() { return fieldData.isBinary(); }
/** True if norms are omitted for this indexed field */
public boolean getOmitNorms() { return omitNorms; }
@@ -211,25 +230,25 @@
public void setOmitNorms(boolean omitNorms) { this.omitNorms=omitNorms; }
public boolean isLazy() {
- return lazy;
+ return fieldData.isLazy();
}
+ public FieldData getData() {
+ return fieldData;
+ }
+
/** Prints a Field for human consumption. */
- public final String toString() {
+ public String toString() {
StringBuffer result = new StringBuffer();
- if (isStored) {
+ if (isStored()) {
result.append("stored");
- if (isCompressed)
- result.append("/compressed");
- else
- result.append("/uncompressed");
}
if (isIndexed) {
if (result.length() > 0)
result.append(",");
result.append("indexed");
}
- if (isTokenized) {
+ if (isTokenized()) {
if (result.length() > 0)
result.append(",");
result.append("tokenized");
@@ -249,7 +268,7 @@
result.append(",");
result.append("termVectorPosition");
}
- if (isBinary) {
+ if (isBinary()) {
if (result.length() > 0)
result.append(",");
result.append("binary");
@@ -257,18 +276,28 @@
if (omitNorms) {
result.append(",omitNorms");
}
- if (lazy){
+ if (isLazy()){
result.append(",lazy");
}
result.append('<');
result.append(name);
result.append(':');
- if (fieldsData != null && lazy == false) {
- result.append(fieldsData);
- }
+ result.append(fieldData);
result.append('>');
return result.toString();
}
+
+ public String stringValue() {
+ return fieldData.stringValue();
+ }
+
+ public Reader readerValue() {
+ return fieldData.readerValue();
+ }
+
+ public byte[] binaryValue() {
+ return fieldData.binaryValue();
+ }
}
Index: src/java/org/apache/lucene/document/Fieldable.java
===================================================================
--- src/java/org/apache/lucene/document/Fieldable.java (révision 433317)
+++ src/java/org/apache/lucene/document/Fieldable.java (copie de travail)
@@ -19,6 +19,8 @@
import java.io.Reader;
import java.io.Serializable;
+import org.apache.lucene.index.FieldData;
+
/**
* Synonymous with {@link Field}.
*
@@ -90,7 +92,7 @@
boolean isTokenized();
/** True if the value of the field is stored and compressed within the index */
- boolean isCompressed();
+ //boolean isCompressed();
/** True iff the term or terms used to index this field are stored as a term
* vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}.
@@ -134,4 +136,6 @@
* @return true if this field can be loaded lazily
*/
boolean isLazy();
+
+ FieldData getData();
}