blob: e48ad27108c75126fd8086ef1c7fe349015615c0 [file] [log] [blame]
Index: lucene/src/java/org/apache/lucene/document/IndexDocValuesField.java
===================================================================
--- lucene/src/java/org/apache/lucene/document/IndexDocValuesField.java (revision 1139823)
+++ lucene/src/java/org/apache/lucene/document/IndexDocValuesField.java (working copy)
@@ -98,12 +98,99 @@
/**
* Sets the given <code>long</code> value and sets the field's {@link ValueType} to
- * {@link ValueType#INTS} unless already set. If you want to change the
+ * {@link ValueType#VAR_INTS} unless already set. If you want to change the
* default type use {@link #setType(ValueType)}.
*/
public void setInt(long value) {
+ setInt(value, false);
+ }
+
+ /**
+ * Sets the given <code>long</code> value as a 64 bit signed integer.
+ *
+ * @param value
+ * the value to set
+ * @param fixed
+ * if <code>true</code> {@link ValueType#FIXED_INTS_64} is used
+ * otherwise {@link ValueType#VAR_INTS}
+ */
+ public void setInt(long value, boolean fixed) {
+ if (type == null) {
+ type = fixed ? ValueType.FIXED_INTS_64 : ValueType.VAR_INTS;
+ }
+ longValue = value;
+ }
+
+ /**
+ * Sets the given <code>int</code> value and sets the field's {@link ValueType} to
+ * {@link ValueType#VAR_INTS} unless already set. If you want to change the
+ * default type use {@link #setType(ValueType)}.
+ */
+ public void setInt(int value) {
+ setInt(value, false);
+ }
+
+ /**
+ * Sets the given <code>int</code> value as a 32 bit signed integer.
+ *
+ * @param value
+ * the value to set
+ * @param fixed
+ * if <code>true</code> {@link ValueType#FIXED_INTS_32} is used
+ * otherwise {@link ValueType#VAR_INTS}
+ */
+ public void setInt(int value, boolean fixed) {
+ if (type == null) {
+ type = fixed ? ValueType.FIXED_INTS_32 : ValueType.VAR_INTS;
+ }
+ longValue = value;
+ }
+
+ /**
+ * Sets the given <code>short</code> value and sets the field's {@link ValueType} to
+ * {@link ValueType#VAR_INTS} unless already set. If you want to change the
+ * default type use {@link #setType(ValueType)}.
+ */
+ public void setInt(short value) {
+ setInt(value, false);
+ }
+
+ /**
+ * Sets the given <code>short</code> value as a 16 bit signed integer.
+ *
+ * @param value
+ * the value to set
+ * @param fixed
+ * if <code>true</code> {@link ValueType#FIXED_INTS_16} is used
+ * otherwise {@link ValueType#VAR_INTS}
+ */
+ public void setInt(short value, boolean fixed) {
if (type == null) {
- type = ValueType.INTS;
+ type = fixed ? ValueType.FIXED_INTS_16 : ValueType.VAR_INTS;
+ }
+ longValue = value;
+ }
+
+ /**
+ * Sets the given <code>byte</code> value and sets the field's {@link ValueType} to
+ * {@link ValueType#VAR_INTS} unless already set. If you want to change the
+ * default type use {@link #setType(ValueType)}.
+ */
+ public void setInt(byte value) {
+ setInt(value, false);
+ }
+ /**
+ * Sets the given <code>byte</code> value as a 8 bit signed integer.
+ *
+ * @param value
+ * the value to set
+ * @param fixed
+ * if <code>true</code> {@link ValueType#FIXED_INTS_8} is used
+ * otherwise {@link ValueType#VAR_INTS}
+ */
+ public void setInt(byte value, boolean fixed) {
+ if (type == null) {
+ type = fixed ? ValueType.FIXED_INTS_8 : ValueType.VAR_INTS;
}
longValue = value;
}
@@ -268,7 +355,7 @@
field.stringValue());
valField.setBytes(ref, type);
break;
- case INTS:
+ case VAR_INTS:
valField.setInt(Long.parseLong(field.stringValue()));
break;
case FLOAT_32:
Index: lucene/src/java/org/apache/lucene/index/CheckIndex.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/CheckIndex.java (revision 1139823)
+++ lucene/src/java/org/apache/lucene/index/CheckIndex.java (working copy)
@@ -1025,7 +1025,11 @@
case FLOAT_64:
values.getFloat();
break;
- case INTS:
+ case VAR_INTS:
+ case FIXED_INTS_16:
+ case FIXED_INTS_32:
+ case FIXED_INTS_64:
+ case FIXED_INTS_8:
values.getInt();
break;
default:
Index: lucene/src/java/org/apache/lucene/index/FieldInfos.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/FieldInfos.java (revision 1139823)
+++ lucene/src/java/org/apache/lucene/index/FieldInfos.java (working copy)
@@ -609,7 +609,7 @@
b = 0;
} else {
switch(fi.docValues) {
- case INTS:
+ case VAR_INTS:
b = 1;
break;
case FLOAT_32:
@@ -636,6 +636,19 @@
case BYTES_VAR_SORTED:
b = 9;
break;
+ case FIXED_INTS_16:
+ b = 10;
+ break;
+ case FIXED_INTS_32:
+ b = 11;
+ break;
+ case FIXED_INTS_64:
+ b = 12;
+ break;
+ case FIXED_INTS_8:
+ b = 13;
+ break;
+
default:
throw new IllegalStateException("unhandled indexValues type " + fi.docValues);
}
@@ -686,7 +699,7 @@
docValuesType = null;
break;
case 1:
- docValuesType = ValueType.INTS;
+ docValuesType = ValueType.VAR_INTS;
break;
case 2:
docValuesType = ValueType.FLOAT_32;
@@ -712,6 +725,19 @@
case 9:
docValuesType = ValueType.BYTES_VAR_SORTED;
break;
+ case 10:
+ docValuesType = ValueType.FIXED_INTS_16;
+ break;
+ case 11:
+ docValuesType = ValueType.FIXED_INTS_32;
+ break;
+ case 12:
+ docValuesType = ValueType.FIXED_INTS_64;
+ break;
+ case 13:
+ docValuesType = ValueType.FIXED_INTS_8;
+ break;
+
default:
throw new IllegalStateException("unhandled indexValues type " + b);
}
Index: lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesConsumer.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesConsumer.java (revision 1139823)
+++ lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesConsumer.java (working copy)
@@ -80,12 +80,17 @@
case BYTES_FIXED_STRAIGHT:
case FLOAT_32:
case FLOAT_64:
- case INTS:
+ case VAR_INTS:
+ case FIXED_INTS_16:
+ case FIXED_INTS_32:
+ case FIXED_INTS_64:
+ case FIXED_INTS_8:
files.add(IndexFileNames.segmentFileName(filename, "",
Writer.DATA_EXTENSION));
assert dir.fileExists(IndexFileNames.segmentFileName(filename, "",
Writer.DATA_EXTENSION));
break;
+
default:
assert false;
}
Index: lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesProducer.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesProducer.java (revision 1139823)
+++ lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesProducer.java (working copy)
@@ -121,8 +121,12 @@
protected IndexDocValues loadDocValues(int docCount, Directory dir, String id,
ValueType type) throws IOException {
switch (type) {
- case INTS:
- return Ints.getValues(dir, id, false);
+ case FIXED_INTS_16:
+ case FIXED_INTS_32:
+ case FIXED_INTS_64:
+ case FIXED_INTS_8:
+ case VAR_INTS:
+ return Ints.getValues(dir, id);
case FLOAT_32:
return Floats.getValues(dir, id, docCount);
case FLOAT_64:
Index: lucene/src/java/org/apache/lucene/index/values/Floats.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/values/Floats.java (revision 1139823)
+++ lucene/src/java/org/apache/lucene/index/values/Floats.java (working copy)
@@ -131,8 +131,9 @@
lastDocId += fillDefault(docBase - lastDocId - 1);
}
lastDocId += reader.transferTo(datOut);
- } else
+ } else {
super.merge(state);
+ }
}
@Override
@@ -290,7 +291,7 @@
}
}
- private class Source4 extends Source {
+ private final class Source4 extends Source {
private final float[] values;
Source4(final float[] values ) throws IOException {
@@ -317,12 +318,22 @@
}
@Override
+ public Object getArray() {
+ return this.values;
+ }
+
+ @Override
+ public boolean hasArray() {
+ return true;
+ }
+
+ @Override
public ValueType type() {
return ValueType.FLOAT_32;
}
}
- private class Source8 extends Source {
+ private final class Source8 extends Source {
private final double[] values;
Source8(final double[] values) throws IOException {
@@ -352,6 +363,16 @@
public ValueType type() {
return ValueType.FLOAT_64;
}
+
+ @Override
+ public Object getArray() {
+ return this.values;
+ }
+
+ @Override
+ public boolean hasArray() {
+ return true;
+ }
}
@Override
Index: lucene/src/java/org/apache/lucene/index/values/IndexDocValues.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/values/IndexDocValues.java (revision 1139823)
+++ lucene/src/java/org/apache/lucene/index/values/IndexDocValues.java (working copy)
@@ -254,6 +254,25 @@
*/
public abstract ValuesEnum getEnum(AttributeSource attrSource)
throws IOException;
+
+ /**
+ * Returns <code>true</code> iff this {@link Source} exposes an array via
+ * {@link #getArray()} otherwise <code>false</code>.
+ *
+ * @return <code>true</code> iff this {@link Source} exposes an array via
+ * {@link #getArray()} otherwise <code>false</code>.
+ */
+ public boolean hasArray() {
+ return false;
+ }
+
+ /**
+ * Returns the internal array representation iff this {@link Source} uses an
+ * array as its inner representation, otherwise <code>null</code>.
+ */
+ public Object getArray() {
+ return null;
+ }
}
/**
Index: lucene/src/java/org/apache/lucene/index/values/IndexDocValuesArray.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/values/IndexDocValuesArray.java (revision 0)
+++ lucene/src/java/org/apache/lucene/index/values/IndexDocValuesArray.java (revision 0)
@@ -0,0 +1,470 @@
+package org.apache.lucene.index.values;
+
+import java.io.IOException;
+import java.util.concurrent.atomic.AtomicLong;
+
+import org.apache.lucene.index.values.IndexDocValues.Source;
+import org.apache.lucene.index.values.IndexDocValues.SourceEnum;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.LongsRef;
+import org.apache.lucene.util.RamUsageEstimator;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to You under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+/**
+ * @lucene.experimental
+ */
+abstract class IndexDocValuesArray extends Source {
+
+ private final AtomicLong bytesUsed;
+ private final int bytesPerValue;
+ private int size = 0;
+ private final ValueType type;
+ protected int maxDocID = -1;
+
+ IndexDocValuesArray(AtomicLong bytesUsed, int bytesPerValue, ValueType type) {
+ this.bytesUsed = bytesUsed;
+ this.bytesPerValue = bytesPerValue;
+ this.type = type;
+ }
+
+ void set(int docId, long value) {
+ if (docId >= size) {
+ adjustSize(grow(docId + 1));
+ }
+ if (docId > maxDocID) {
+ maxDocID = docId;
+ }
+ setInternal(docId, value);
+ }
+
+ protected final void adjustSize(int newSize) {
+ bytesUsed.addAndGet(bytesPerValue * (newSize - size));
+ size = newSize;
+ }
+
+ void clear() {
+ adjustSize(0);
+ maxDocID = -1;
+ size = 0;
+ }
+
+ protected abstract void setInternal(int docId, long value);
+
+ protected abstract int grow(int numDocs);
+
+ abstract void write(IndexOutput output, int numDocs) throws IOException;
+
+ @Override
+ public final int getValueCount() {
+ return maxDocID + 1;
+ }
+
+ @Override
+ public final ValueType type() {
+ return type;
+ }
+
+ @Override
+ public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
+ return new SourceEnum(attrSource, type(), this, maxDocID + 1) {
+
+ @Override
+ public int advance(int target) throws IOException {
+ if (target >= numDocs) {
+ return pos = NO_MORE_DOCS;
+ }
+ intsRef.ints[intsRef.offset] = IndexDocValuesArray.this.getInt(target);
+ return pos = target;
+ }
+ };
+ }
+
+ abstract ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input)
+ throws IOException;
+
+ @Override
+ public final boolean hasArray() {
+ return true;
+ }
+
+ final static class ByteValues extends IndexDocValuesArray {
+ private byte[] values;
+
+ ByteValues(AtomicLong bytesUsed) {
+ super(bytesUsed, 1, ValueType.FIXED_INTS_8);
+ values = new byte[0];
+ }
+
+ ByteValues(IndexInput input) throws IOException {
+ super(new AtomicLong(), 1, ValueType.FIXED_INTS_8);
+ final int numDocs = input.readInt();
+ values = new byte[numDocs];
+ adjustSize(numDocs);
+ input.readBytes(values, 0, values.length, false);
+ maxDocID = numDocs - 1;
+ }
+
+ @Override
+ public byte[] getArray() {
+ return values;
+ }
+
+ @Override
+ public long getInt(int docID) {
+ assert docID >= 0 && docID < values.length;
+ return values[docID];
+ }
+
+ @Override
+ protected void setInternal(int docId, long value) {
+ values[docId] = (byte) (0xFFL & value);
+ }
+
+ @Override
+ protected int grow(int numDocs) {
+ values = ArrayUtil.grow(values, numDocs);
+ return values.length;
+ }
+
+ @Override
+ void write(IndexOutput output, int numDocs) throws IOException {
+ assert maxDocID + 1 <= numDocs;
+ output.writeInt(numDocs);
+ output.writeBytes(values, 0, maxDocID + 1);
+ final byte zero = 0;
+ for (int i = maxDocID + 1; i < numDocs; i++) {
+ output.writeByte(zero);
+ }
+ }
+
+ @Override
+ ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input)
+ throws IOException {
+ return new FixedIntsEnumImpl(attrSource, input, type()) {
+ @Override
+ protected void fillNext(LongsRef ref, IndexInput dataIn)
+ throws IOException {
+ ref.ints[ref.offset] = dataIn.readByte();
+ }
+ };
+ }
+
+ @Override
+ void clear() {
+ super.clear();
+ values = new byte[0];
+ }
+ };
+
+ final static class ShortValues extends IndexDocValuesArray {
+ private short[] values;
+
+ ShortValues(AtomicLong bytesUsed) {
+ super(bytesUsed, RamUsageEstimator.NUM_BYTES_SHORT,
+ ValueType.FIXED_INTS_16);
+ values = new short[0];
+ }
+
+ ShortValues(IndexInput input) throws IOException {
+ super(new AtomicLong(), RamUsageEstimator.NUM_BYTES_SHORT,
+ ValueType.FIXED_INTS_16);
+ final int numDocs = input.readInt();
+ values = new short[numDocs];
+ adjustSize(numDocs);
+ for (int i = 0; i < values.length; i++) {
+ values[i] = input.readShort();
+ }
+ maxDocID = numDocs - 1;
+ }
+
+ @Override
+ public short[] getArray() {
+ return values;
+ }
+
+ @Override
+ public long getInt(int docID) {
+ assert docID >= 0 && docID < values.length;
+ return values[docID];
+ }
+
+ @Override
+ protected void setInternal(int docId, long value) {
+ values[docId] = (short) (0xFFFF & value);
+ }
+
+ @Override
+ protected int grow(int numDocs) {
+ values = ArrayUtil.grow(values, numDocs);
+ return values.length;
+ }
+
+ @Override
+ void write(IndexOutput output, int numDocs) throws IOException {
+ assert maxDocID + 1 <= numDocs;
+ output.writeInt(numDocs);
+ for (int i = 0; i < maxDocID + 1; i++) {
+ output.writeShort(values[i]);
+ }
+ final short zero = 0;
+ for (int i = maxDocID + 1; i < numDocs; i++) {
+ output.writeShort(zero);
+ }
+ }
+
+ @Override
+ ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input)
+ throws IOException {
+ return new FixedIntsEnumImpl(attrSource, input, type()) {
+ @Override
+ protected void fillNext(LongsRef ref, IndexInput dataIn)
+ throws IOException {
+ ref.ints[ref.offset] = dataIn.readShort();
+ }
+ };
+ }
+
+ @Override
+ void clear() {
+ super.clear();
+ values = new short[0];
+ }
+
+ };
+
+ final static class IntValues extends IndexDocValuesArray {
+ private int[] values;
+
+ IntValues(AtomicLong bytesUsed) {
+ super(bytesUsed, RamUsageEstimator.NUM_BYTES_INT, ValueType.FIXED_INTS_32);
+ values = new int[0];
+ }
+
+ IntValues(IndexInput input) throws IOException {
+ super(new AtomicLong(), RamUsageEstimator.NUM_BYTES_INT,
+ ValueType.FIXED_INTS_32);
+ final int numDocs = input.readInt();
+ values = new int[numDocs];
+ adjustSize(numDocs);
+ for (int i = 0; i < values.length; i++) {
+ values[i] = input.readInt();
+ }
+ maxDocID = numDocs - 1;
+ }
+
+ @Override
+ public int[] getArray() {
+ return values;
+ }
+
+ @Override
+ public long getInt(int docID) {
+ assert docID >= 0 && docID < values.length;
+ return 0xFFFFFFFF & values[docID];
+ }
+
+ @Override
+ protected void setInternal(int docId, long value) {
+ values[docId] = (int) (0xFFFFFFFF & value);
+ }
+
+ @Override
+ protected int grow(int numDocs) {
+ values = ArrayUtil.grow(values, numDocs);
+ return values.length;
+ }
+
+ @Override
+ void write(IndexOutput output, int numDocs) throws IOException {
+ assert maxDocID + 1 <= numDocs;
+ output.writeInt(numDocs);
+ for (int i = 0; i < maxDocID + 1; i++) {
+ output.writeInt(values[i]);
+ }
+ for (int i = maxDocID + 1; i < numDocs; i++) {
+ output.writeInt(0);
+ }
+ }
+
+ @Override
+ ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input)
+ throws IOException {
+ return new FixedIntsEnumImpl(attrSource, input, type()) {
+ @Override
+ protected void fillNext(LongsRef ref, IndexInput dataIn)
+ throws IOException {
+ ref.ints[ref.offset] = dataIn.readInt();
+ }
+ };
+ }
+
+ @Override
+ void clear() {
+ super.clear();
+ values = new int[0];
+ }
+ };
+
+ final static class LongValues extends IndexDocValuesArray {
+ private long[] values;
+
+ LongValues(AtomicLong bytesUsed) {
+ super(bytesUsed, RamUsageEstimator.NUM_BYTES_LONG,
+ ValueType.FIXED_INTS_64);
+ values = new long[0];
+ }
+
+ LongValues(IndexInput input) throws IOException {
+ super(new AtomicLong(), RamUsageEstimator.NUM_BYTES_LONG,
+ ValueType.FIXED_INTS_64);
+ final int numDocs = input.readInt();
+ values = new long[numDocs];
+ adjustSize(numDocs);
+ for (int i = 0; i < values.length; i++) {
+ values[i] = input.readLong();
+ }
+ maxDocID = numDocs - 1;
+ }
+
+ @Override
+ public long[] getArray() {
+ return values;
+ }
+
+ @Override
+ public long getInt(int docID) {
+ assert docID >= 0 && docID < values.length;
+ return values[docID];
+ }
+
+ @Override
+ protected void setInternal(int docId, long value) {
+ values[docId] = value;
+ }
+
+ @Override
+ protected int grow(int numDocs) {
+ values = ArrayUtil.grow(values, numDocs);
+ return values.length;
+ }
+
+ @Override
+ void write(IndexOutput output, int numDocs) throws IOException {
+ assert maxDocID + 1 <= numDocs;
+ output.writeInt(numDocs);
+ for (int i = 0; i < maxDocID + 1; i++) {
+ output.writeLong(values[i]);
+ }
+
+ for (int i = maxDocID + 1; i < numDocs; i++) {
+ output.writeLong(0l);
+ }
+ }
+
+ @Override
+ ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input)
+ throws IOException {
+ return new FixedIntsEnumImpl(attrSource, input, type()) {
+ @Override
+ protected void fillNext(LongsRef ref, IndexInput dataIn)
+ throws IOException {
+ ref.ints[ref.offset] = dataIn.readLong();
+ }
+ };
+ }
+
+ @Override
+ void clear() {
+ super.clear();
+ values = new long[0];
+ }
+ };
+
+ private abstract static class FixedIntsEnumImpl extends ValuesEnum {
+ private final IndexInput dataIn;
+ private final int maxDoc;
+ private final int sizeInByte;
+ private int pos = -1;
+
+ private FixedIntsEnumImpl(AttributeSource source, IndexInput dataIn,
+ ValueType type) throws IOException {
+ super(source, type);
+ switch (type) {
+ case FIXED_INTS_16:
+ sizeInByte = 2;
+ break;
+ case FIXED_INTS_32:
+ sizeInByte = 4;
+ break;
+ case FIXED_INTS_64:
+ sizeInByte = 8;
+ break;
+ case FIXED_INTS_8:
+ sizeInByte = 1;
+ break;
+ default:
+ throw new IllegalStateException("type " + type
+ + " is not a fixed int type");
+ }
+ intsRef.offset = 0;
+ this.dataIn = dataIn;
+ maxDoc = dataIn.readInt();
+
+ }
+
+ @Override
+ public void close() throws IOException {
+ dataIn.close();
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ if (target >= maxDoc) {
+ return pos = NO_MORE_DOCS;
+ }
+ assert target > pos;
+ if (target > pos + 1) {
+ dataIn
+ .seek(dataIn.getFilePointer() + ((target - pos - 1) * sizeInByte));
+ }
+ fillNext(intsRef, dataIn);
+ return pos = target;
+ }
+
+ protected abstract void fillNext(LongsRef ref, IndexInput input)
+ throws IOException;
+
+ @Override
+ public int docID() {
+ return pos;
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ if (pos >= maxDoc) {
+ return pos = NO_MORE_DOCS;
+ }
+ return advance(pos + 1);
+ }
+ }
+
+}
Property changes on: lucene/src/java/org/apache/lucene/index/values/IndexDocValuesArray.java
___________________________________________________________________
Added: svn:eol-style
+ native
Added: svn:keywords
+ Date Author Id Revision HeadURL
Index: lucene/src/java/org/apache/lucene/index/values/Ints.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/values/Ints.java (revision 1139823)
+++ lucene/src/java/org/apache/lucene/index/values/Ints.java (working copy)
@@ -33,14 +33,11 @@
private Ints() {
}
- public static Writer getWriter(Directory dir, String id,
- boolean useFixedArray, AtomicLong bytesUsed) throws IOException {
- // TODO - implement fixed?!
- return new IntsWriter(dir, id, bytesUsed);
+ public static Writer getWriter(Directory dir, String id, AtomicLong bytesUsed, ValueType type) throws IOException {
+ return new IntsWriter(dir, id, bytesUsed, type);
}
- public static IndexDocValues getValues(Directory dir, String id,
- boolean useFixedArray) throws IOException {
+ public static IndexDocValues getValues(Directory dir, String id) throws IOException {
return new IntsReader(dir, id);
}
}
Index: lucene/src/java/org/apache/lucene/index/values/IntsImpl.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/values/IntsImpl.java (revision 1139823)
+++ lucene/src/java/org/apache/lucene/index/values/IntsImpl.java (working copy)
@@ -21,19 +21,24 @@
import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.values.IndexDocValuesArray;
+import org.apache.lucene.index.values.IndexDocValues.Source;
+import org.apache.lucene.index.values.IndexDocValues.SourceEnum;
+import org.apache.lucene.index.values.IndexDocValuesArray.ByteValues;
+import org.apache.lucene.index.values.IndexDocValuesArray.IntValues;
+import org.apache.lucene.index.values.IndexDocValuesArray.LongValues;
+import org.apache.lucene.index.values.IndexDocValuesArray.ShortValues;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LongsRef;
-import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.PackedInts;
/**
- * Stores ints packed with fixed-bit precision.
+ * Stores ints packed and fixed with fixed-bit precision.
*
* @lucene.experimental
* */
@@ -41,45 +46,59 @@
private static final String CODEC_NAME = "Ints";
private static final byte PACKED = 0x00;
- private static final byte FIXED = 0x01;
-
+ private static final byte FIXED_64 = 0x01;
+ private static final byte FIXED_32 = 0x02;
+ private static final byte FIXED_16 = 0x03;
+ private static final byte FIXED_8 = 0x04;
+
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;
static class IntsWriter extends Writer {
- // TODO: can we bulkcopy this on a merge?
+ // TODO: optimize merging here!!
private LongsRef intsRef;
- private long[] docToValue;
+ private final IndexDocValuesArray array;
private long minValue;
private long maxValue;
private boolean started;
private final String id;
private int lastDocId = -1;
- private IndexOutput datOut;
+ private final Directory dir;
+ private final byte typeOrd;
+
- protected IntsWriter(Directory dir, String id, AtomicLong bytesUsed)
- throws IOException {
+ protected IntsWriter(Directory dir, String id, AtomicLong bytesUsed,
+ ValueType valueType) throws IOException {
super(bytesUsed);
- datOut = dir.createOutput(IndexFileNames.segmentFileName(id, "",
- DATA_EXTENSION));
- boolean success = false;
- try {
- CodecUtil.writeHeader(datOut, CODEC_NAME, VERSION_CURRENT);
- this.id = id;
- docToValue = new long[1];
- bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_LONG); // TODO the
- // bitset
- // needs memory
- // too
- success = true;
- } finally {
- if (!success) {
- datOut.close();
- }
+ this.dir = dir;
+ this.id = id;
+ switch (valueType) {
+ case FIXED_INTS_16:
+ array= new ShortValues(bytesUsed);
+ typeOrd = FIXED_16;
+ break;
+ case FIXED_INTS_32:
+ array = new IntValues(bytesUsed);
+ typeOrd = FIXED_32;
+ break;
+ case FIXED_INTS_64:
+ array = new LongValues(bytesUsed);
+ typeOrd = FIXED_64;
+ break;
+ case FIXED_INTS_8:
+ array = new ByteValues(bytesUsed);
+ typeOrd = FIXED_8;
+ break;
+ case VAR_INTS:
+ array = new LongValues(bytesUsed);
+ typeOrd = PACKED;
+ break;
+ default:
+ throw new IllegalStateException("unknown type " + valueType);
}
}
-
+
@Override
public void add(int docID, long v) throws IOException {
assert lastDocId < docID;
@@ -94,65 +113,60 @@
}
}
lastDocId = docID;
-
- if (docID >= docToValue.length) {
- final long len = docToValue.length;
- docToValue = ArrayUtil.grow(docToValue, 1 + docID);
- bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_LONG
- * ((docToValue.length) - len));
- }
- docToValue[docID] = v;
+ array.set(docID, v);
}
@Override
public void finish(int docCount) throws IOException {
+ IndexOutput datOut = null;
+ boolean success = false;
try {
+ datOut = dir.createOutput(IndexFileNames.segmentFileName(id, "",
+ DATA_EXTENSION));
+ CodecUtil.writeHeader(datOut, CODEC_NAME, VERSION_CURRENT);
if (!started) {
minValue = maxValue = 0;
}
- // if we exceed the range of positive longs we must switch to fixed ints
- if ((maxValue - minValue) < (((long)1) << 63) && (maxValue - minValue) >= 0) {
- writePackedInts(docCount);
- } else {
- writeFixedInts(docCount);
+ byte headerType = typeOrd;
+ if (typeOrd == PACKED) {
+ final long delta = maxValue - minValue;
+ // if we exceed the range of positive longs we must switch to fixed ints
+ if (delta <= ( maxValue >= 0 && minValue <= 0 ? Long.MAX_VALUE : Long.MAX_VALUE -1) && delta >= 0) {
+ writePackedInts(datOut, docCount);
+ return;
+ }
+ headerType = FIXED_64;
}
-
+ datOut.writeByte(headerType);
+ array.write(datOut, docCount);
+ success = true;
} finally {
- datOut.close();
- bytesUsed
- .addAndGet(-(RamUsageEstimator.NUM_BYTES_LONG * docToValue.length));
- docToValue = null;
- }
- }
-
- private void writeFixedInts(int docCount) throws IOException {
- datOut.writeByte(FIXED);
- datOut.writeInt(docCount);
- for (int i = 0; i < docToValue.length; i++) {
- datOut.writeLong(docToValue[i]); // write full array - we use 0 as default
- }
- for (int i = docToValue.length; i < docCount; i++) {
- datOut.writeLong(0); // fill with defaults values
+ IOUtils.closeSafely(!success, datOut);
+ array.clear();
}
}
- private void writePackedInts(int docCount) throws IOException {
+ private void writePackedInts(IndexOutput datOut, int docCount) throws IOException {
datOut.writeByte(PACKED);
datOut.writeLong(minValue);
+ assert array.type() == ValueType.FIXED_INTS_64;
+ final long[] docToValue = (long[])array.getArray();
// write a default value to recognize docs without a value for that
// field
- final long defaultValue = maxValue>= 0 && minValue <=0 ? 0-minValue : ++maxValue-minValue;
+ final long defaultValue = maxValue >= 0 && minValue <= 0 ? 0 - minValue
+ : ++maxValue - minValue;
datOut.writeLong(defaultValue);
PackedInts.Writer w = PackedInts.getWriter(datOut, docCount,
- PackedInts.bitsRequired(maxValue-minValue));
- final int limit = docToValue.length > docCount ? docCount : docToValue.length;
+ PackedInts.bitsRequired(maxValue - minValue));
+ final int limit = docToValue.length > docCount ? docCount
+ : docToValue.length;
for (int i = 0; i < limit; i++) {
w.add(docToValue[i] == 0 ? defaultValue : docToValue[i] - minValue);
}
for (int i = limit; i < docCount; i++) {
w.add(defaultValue);
}
-
+
w.finish();
}
@@ -183,7 +197,7 @@
*/
static class IntsReader extends IndexDocValues {
private final IndexInput datIn;
- private final boolean packed;
+ private final byte type;
protected IntsReader(Directory dir, String id) throws IOException {
datIn = dir.openInput(IndexFileNames.segmentFileName(id, "",
@@ -191,7 +205,7 @@
boolean success = false;
try {
CodecUtil.checkHeader(datIn, CODEC_NAME, VERSION_START, VERSION_START);
- packed = PACKED == datIn.readByte();
+ type = datIn.readByte();
success = true;
} finally {
if (!success) {
@@ -206,100 +220,21 @@
*/
@Override
public Source load() throws IOException {
- final IndexInput input = (IndexInput) datIn.clone();
boolean success = false;
+ final Source source;
+ IndexInput input = null;
try {
- final Source source = packed ? new PackedIntsSource(input)
- : new FixedIntsSource(input);
+ input = (IndexInput) datIn.clone();
+ input.seek(CodecUtil.headerLength(CODEC_NAME) + 1);
+ source = loadFixedSource(type, input);
success = true;
return source;
} finally {
if (!success) {
- IOUtils.closeSafely(true, datIn);
+ IOUtils.closeSafely(true, input, datIn);
}
}
}
-
- private static class FixedIntsSource extends Source {
- private final long[] values;
- public FixedIntsSource(IndexInput dataIn) throws IOException {
- dataIn.seek(CodecUtil.headerLength(CODEC_NAME) + 1);
- final int numDocs = dataIn.readInt();
- values = new long[numDocs];
- for (int i = 0; i < values.length; i++) {
- values[i] = dataIn.readLong();
- }
- }
-
- @Override
- public long getInt(int docID) {
- assert docID >= 0 && docID < values.length;
- return values[docID];
- }
-
- @Override
- public ValueType type() {
- return ValueType.INTS;
- }
-
- @Override
- public ValuesEnum getEnum(AttributeSource attrSource)
- throws IOException {
- return new SourceEnum(attrSource, type(), this, values.length) {
-
- @Override
- public int advance(int target) throws IOException {
- if (target >= numDocs)
- return pos = NO_MORE_DOCS;
- intsRef.ints[intsRef.offset] = values[target];
- return pos = target;
- }
- };
- }
-
- }
-
- private static class PackedIntsSource extends Source {
- private final long minValue;
- private final long defaultValue;
- private final PackedInts.Reader values;
-
- public PackedIntsSource(IndexInput dataIn) throws IOException {
- dataIn.seek(CodecUtil.headerLength(CODEC_NAME) + 1);
- minValue = dataIn.readLong();
- defaultValue = dataIn.readLong();
- values = PackedInts.getReader(dataIn);
- }
-
- @Override
- public long getInt(int docID) {
- // TODO -- can we somehow avoid 2X method calls
- // on each get? must push minValue down, and make
- // PackedInts implement Ints.Source
- assert docID >= 0;
- final long value = values.get(docID);
- return value == defaultValue ? 0 : minValue + value;
- }
-
- @Override
- public ValuesEnum getEnum(AttributeSource attrSource)
- throws IOException {
- return new SourceEnum(attrSource, type(), this, values.size()) {
- @Override
- public int advance(int target) throws IOException {
- if (target >= numDocs)
- return pos = NO_MORE_DOCS;
- intsRef.ints[intsRef.offset] = source.getInt(target);
- return pos = target;
- }
- };
- }
-
- @Override
- public ValueType type() {
- return ValueType.INTS;
- }
- }
@Override
public void close() throws IOException {
@@ -312,8 +247,8 @@
final IndexInput input = (IndexInput) datIn.clone();
boolean success = false;
try {
- ValuesEnum inst = packed ? new PackedIntsEnumImpl(source, input)
- : new FixedIntsEnumImpl(source, input);
+ input.seek(CodecUtil.headerLength(CODEC_NAME) + 1);
+ final ValuesEnum inst = directEnum(type, source, input);
success = true;
return inst;
} finally {
@@ -325,77 +260,108 @@
@Override
public ValueType type() {
- return ValueType.INTS;
+ return ValueType.VAR_INTS;
}
-
}
-
- private static final class PackedIntsEnumImpl extends ValuesEnum {
- private final PackedInts.ReaderIterator ints;
- private long minValue;
- private final IndexInput dataIn;
+
+ private static ValuesEnum directEnum(byte ord, AttributeSource attrSource, IndexInput input) throws IOException {
+ switch (ord) {
+ case FIXED_16:
+ return new ShortValues((AtomicLong)null).getDirectEnum(attrSource, input);
+ case FIXED_32:
+ return new IntValues((AtomicLong)null).getDirectEnum(attrSource, input);
+ case FIXED_64:
+ return new LongValues((AtomicLong)null).getDirectEnum(attrSource, input);
+ case FIXED_8:
+ return new ByteValues((AtomicLong)null).getDirectEnum(attrSource, input);
+ case PACKED:
+ return new PackedIntsEnumImpl(attrSource, input);
+ default:
+ throw new IllegalStateException("unknown type ordinal " + ord);
+ }
+ }
+
+ private static IndexDocValues.Source loadFixedSource(byte ord, IndexInput input) throws IOException {
+ switch (ord) {
+ case FIXED_16:
+ return new ShortValues(input);
+ case FIXED_32:
+ return new IntValues(input);
+ case FIXED_64:
+ return new LongValues(input);
+ case FIXED_8:
+ return new ByteValues(input);
+ case PACKED:
+ return new PackedIntsSource(input);
+ default:
+ throw new IllegalStateException("unknown type ordinal " + ord);
+ }
+ }
+
+ static class PackedIntsSource extends Source {
+ private final long minValue;
private final long defaultValue;
- private final int maxDoc;
- private int pos = -1;
+ private final PackedInts.Reader values;
- private PackedIntsEnumImpl(AttributeSource source, IndexInput dataIn)
- throws IOException {
- super(source, ValueType.INTS);
- intsRef.offset = 0;
- this.dataIn = dataIn;
- dataIn.seek(CodecUtil.headerLength(CODEC_NAME) + 1);
+ public PackedIntsSource(IndexInput dataIn) throws IOException {
+
minValue = dataIn.readLong();
defaultValue = dataIn.readLong();
- this.ints = PackedInts.getReaderIterator(dataIn);
- maxDoc = ints.size();
- }
-
- @Override
- public void close() throws IOException {
- ints.close();
- dataIn.close();
+ values = PackedInts.getReader(dataIn);
}
@Override
- public int advance(int target) throws IOException {
- if (target >= maxDoc) {
- return pos = NO_MORE_DOCS;
- }
- final long val = ints.advance(target);
- intsRef.ints[intsRef.offset] = val == defaultValue ? 0 : minValue + val;
- return pos = target;
+ public long getInt(int docID) {
+ // TODO -- can we somehow avoid 2X method calls
+ // on each get? must push minValue down, and make
+ // PackedInts implement Ints.Source
+ assert docID >= 0;
+ final long value = values.get(docID);
+ return value == defaultValue ? 0 : minValue + value;
}
@Override
- public int docID() {
- return pos;
+ public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
+ return new SourceEnum(attrSource, type(), this, values.size()) {
+ @Override
+ public int advance(int target) throws IOException {
+ if (target >= numDocs)
+ return pos = NO_MORE_DOCS;
+ intsRef.ints[intsRef.offset] = source.getInt(target);
+ return pos = target;
+ }
+ };
}
@Override
- public int nextDoc() throws IOException {
- if (pos >= maxDoc) {
- return pos = NO_MORE_DOCS;
- }
- return advance(pos + 1);
+ public ValueType type() {
+ return ValueType.VAR_INTS;
}
}
-
- private static final class FixedIntsEnumImpl extends ValuesEnum {
+
+
+ private static final class PackedIntsEnumImpl extends ValuesEnum {
+ private final PackedInts.ReaderIterator ints;
+ private long minValue;
private final IndexInput dataIn;
+ private final long defaultValue;
private final int maxDoc;
private int pos = -1;
- private FixedIntsEnumImpl(AttributeSource source, IndexInput dataIn)
+ private PackedIntsEnumImpl(AttributeSource source, IndexInput dataIn)
throws IOException {
- super(source, ValueType.INTS);
+ super(source, ValueType.VAR_INTS);
intsRef.offset = 0;
this.dataIn = dataIn;
- dataIn.seek(CodecUtil.headerLength(CODEC_NAME) + 1);
- maxDoc = dataIn.readInt();
+ minValue = dataIn.readLong();
+ defaultValue = dataIn.readLong();
+ this.ints = PackedInts.getReaderIterator(dataIn);
+ maxDoc = ints.size();
}
@Override
public void close() throws IOException {
+ ints.close();
dataIn.close();
}
@@ -404,11 +370,8 @@
if (target >= maxDoc) {
return pos = NO_MORE_DOCS;
}
- assert target > pos;
- if (target > pos+1) {
- dataIn.seek(dataIn.getFilePointer() + ((target - pos - 1) * 8));
- }
- intsRef.ints[intsRef.offset] = dataIn.readLong();
+ final long val = ints.advance(target);
+ intsRef.ints[intsRef.offset] = val == defaultValue ? 0 : minValue + val;
return pos = target;
}
@@ -425,5 +388,7 @@
return advance(pos + 1);
}
}
+
+
}
\ No newline at end of file
Index: lucene/src/java/org/apache/lucene/index/values/ValueType.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/values/ValueType.java (revision 1139823)
+++ lucene/src/java/org/apache/lucene/index/values/ValueType.java (working copy)
@@ -18,8 +18,8 @@
*/
import org.apache.lucene.index.codecs.Codec;
-import org.apache.lucene.index.codecs.PerDocConsumer;
import org.apache.lucene.index.values.IndexDocValues.SortedSource;
+import org.apache.lucene.index.values.IndexDocValues.Source;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.packed.PackedInts;
@@ -32,16 +32,40 @@
* @lucene.experimental
*/
public enum ValueType {
- /*
- * TODO: Add INT_32 INT_64 INT_16 & INT_8?!
- */
+
/**
- * A 64 bit integer value. By default this type uses
+ * A variable bit signed integer value. By default this type uses
* {@link PackedInts} to compress the values, as an offset
* from the minimum value, as long as the value range
* fits into 2<sup>63</sup>-1. Otherwise,
* the default implementation falls back to fixed size 64bit
- * integers.
+ * integers ({@link #FIXED_INTS_64}).
+ * <p>
+ * NOTE: this type uses <tt>0</tt> as the default value without any
+ * distinction between provided <tt>0</tt> values during indexing. All
+ * documents without an explicit value will use <tt>0</tt> instead. In turn,
+ * {@link ValuesEnum} instances will not skip documents without an explicit
+ * value assigned. Custom default values must be assigned explicitly.
+ * </p>
+ */
+ VAR_INTS,
+
+ /**
+ * A 8 bit signed integer value. {@link Source} instances of
+ * this type return a <tt>byte</tt> array from {@link Source#getArray()}
+ * <p>
+ * NOTE: this type uses <tt>0</tt> as the default value without any
+ * distinction between provided <tt>0</tt> values during indexing. All
+ * documents without an explicit value will use <tt>0</tt> instead. In turn,
+ * {@link ValuesEnum} instances will not skip documents without an explicit
+ * value assigned. Custom default values must be assigned explicitly.
+ * </p>
+ */
+ FIXED_INTS_8,
+
+ /**
+ * A 16 bit signed integer value. {@link Source} instances of
+ * this type return a <tt>short</tt> array from {@link Source#getArray()}
* <p>
* NOTE: this type uses <tt>0</tt> as the default value without any
* distinction between provided <tt>0</tt> values during indexing. All
@@ -50,13 +74,39 @@
* value assigned. Custom default values must be assigned explicitly.
* </p>
*/
- INTS,
+ FIXED_INTS_16,
+
+ /**
+ * A 32 bit signed integer value. {@link Source} instances of
+ * this type return a <tt>int</tt> array from {@link Source#getArray()}
+ * <p>
+ * NOTE: this type uses <tt>0</tt> as the default value without any
+ * distinction between provided <tt>0</tt> values during indexing. All
+ * documents without an explicit value will use <tt>0</tt> instead. In turn,
+ * {@link ValuesEnum} instances will not skip documents without an explicit
+ * value assigned. Custom default values must be assigned explicitly.
+ * </p>
+ */
+ FIXED_INTS_32,
/**
+ * A 64 bit signed integer value. {@link Source} instances of
+ * this type return a <tt>long</tt> array from {@link Source#getArray()}
+ * <p>
+ * NOTE: this type uses <tt>0</tt> as the default value without any
+ * distinction between provided <tt>0</tt> values during indexing. All
+ * documents without an explicit value will use <tt>0</tt> instead. In turn,
+ * {@link ValuesEnum} instances will not skip documents without an explicit
+ * value assigned. Custom default values must be assigned explicitly.
+ * </p>
+ */
+ FIXED_INTS_64,
+ /**
* A 32 bit floating point value. By default there is no compression
* applied. To fit custom float values into less than 32bit either a custom
* implementation is needed or values must be encoded into a
- * {@link #BYTES_FIXED_STRAIGHT} type.
+ * {@link #BYTES_FIXED_STRAIGHT} type. {@link Source} instances of
+ * this type return a <tt>float</tt> array from {@link Source#getArray()}
* <p>
* NOTE: this type uses <tt>0.0f</tt> as the default value without any
* distinction between provided <tt>0.0f</tt> values during indexing. All
@@ -67,10 +117,12 @@
*/
FLOAT_32,
/**
+ *
* A 64 bit floating point value. By default there is no compression
* applied. To fit custom float values into less than 64bit either a custom
* implementation is needed or values must be encoded into a
- * {@link #BYTES_FIXED_STRAIGHT} type.
+ * {@link #BYTES_FIXED_STRAIGHT} type. {@link Source} instances of
+ * this type return a <tt>double</tt> array from {@link Source#getArray()}
* <p>
* NOTE: this type uses <tt>0.0d</tt> as the default value without any
* distinction between provided <tt>0.0d</tt> values during indexing. All
Index: lucene/src/java/org/apache/lucene/index/values/ValuesEnum.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/values/ValuesEnum.java (revision 1139823)
+++ lucene/src/java/org/apache/lucene/index/values/ValuesEnum.java (working copy)
@@ -71,13 +71,18 @@
case BYTES_VAR_STRAIGHT:
bytesRef = new BytesRef();
break;
- case INTS:
+ case FIXED_INTS_16:
+ case FIXED_INTS_32:
+ case FIXED_INTS_64:
+ case FIXED_INTS_8:
+ case VAR_INTS:
intsRef = new LongsRef(1);
break;
case FLOAT_32:
case FLOAT_64:
floatsRef = new FloatsRef(1);
break;
+
}
}
Index: lucene/src/java/org/apache/lucene/index/values/Writer.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/values/Writer.java (revision 1139823)
+++ lucene/src/java/org/apache/lucene/index/values/Writer.java (working copy)
@@ -197,8 +197,12 @@
comp = BytesRef.getUTF8SortedAsUnicodeComparator();
}
switch (type) {
- case INTS:
- return Ints.getWriter(directory, id, true, bytesUsed);
+ case FIXED_INTS_16:
+ case FIXED_INTS_32:
+ case FIXED_INTS_64:
+ case FIXED_INTS_8:
+ case VAR_INTS:
+ return Ints.getWriter(directory, id, bytesUsed, type);
case FLOAT_32:
return Floats.getWriter(directory, id, 4, bytesUsed);
case FLOAT_64:
@@ -221,6 +225,7 @@
case BYTES_VAR_SORTED:
return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, comp, false,
bytesUsed);
+
default:
throw new IllegalArgumentException("Unknown Values: " + type);
}
Index: lucene/src/java/org/apache/lucene/search/function/NumericIndexDocValueSource.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/function/NumericIndexDocValueSource.java (revision 1139823)
+++ lucene/src/java/org/apache/lucene/search/function/NumericIndexDocValueSource.java (working copy)
@@ -59,7 +59,7 @@
}
};
- case INTS:
+ case VAR_INTS:
return new DocValues() {
@Override
public String toString(int doc) {
Index: lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java
===================================================================
--- lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java (revision 1139823)
+++ lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java (working copy)
@@ -200,9 +200,21 @@
case FLOAT_64:
docValuesField.setFloat(random.nextDouble());
break;
- case INTS:
+ case VAR_INTS:
+ docValuesField.setInt(random.nextLong());
+ break;
+ case FIXED_INTS_16:
+ docValuesField.setInt(random.nextInt(Short.MAX_VALUE));
+ break;
+ case FIXED_INTS_32:
docValuesField.setInt(random.nextInt());
break;
+ case FIXED_INTS_64:
+ docValuesField.setInt(random.nextLong());
+ break;
+ case FIXED_INTS_8:
+ docValuesField.setInt(random.nextInt(128));
+ break;
default:
throw new IllegalArgumentException("no such type: " + type);
}
Index: lucene/src/test/org/apache/lucene/index/values/TestDocValues.java
===================================================================
--- lucene/src/test/org/apache/lucene/index/values/TestDocValues.java (revision 1139823)
+++ lucene/src/test/org/apache/lucene/index/values/TestDocValues.java (working copy)
@@ -170,76 +170,241 @@
dir.close();
}
- public void testInts() throws IOException {
- long[] maxMin = new long[] {
- Long.MIN_VALUE, Long.MAX_VALUE,
- 1, Long.MAX_VALUE,
- 0, Long.MAX_VALUE,
- -1, Long.MAX_VALUE,
- Long.MIN_VALUE, -1,
- random.nextInt(), random.nextInt() };
- for (int j = 0; j < maxMin.length; j+=2) {
- long maxV = 1;
- final int NUM_VALUES = 777 + random.nextInt(777);
- final long[] values = new long[NUM_VALUES];
- for (int rx = 1; rx < 63; rx++, maxV *= 2) {
- Directory dir = newDirectory();
- final AtomicLong trackBytes = new AtomicLong(0);
- Writer w = Ints.getWriter(dir, "test", false, trackBytes);
- values[0] = maxMin[j];
- w.add(0, values[0]);
- values[1] = maxMin[j+1];
- w.add(1, values[1]);
- for (int i = 2; i < NUM_VALUES; i++) {
- final long v = random.nextLong() % (1 + maxV);
- values[i] = v;
- w.add(i, v);
- }
- final int additionalDocs = 1 + random.nextInt(9);
- w.finish(NUM_VALUES + additionalDocs);
- assertEquals(0, trackBytes.get());
+ public void testVariableIntsLimits() throws IOException {
+ long[][] minMax = new long[][] { { Long.MIN_VALUE, Long.MAX_VALUE },
+ { Long.MIN_VALUE + 1, 1 }, { -1, Long.MAX_VALUE },
+ { Long.MIN_VALUE, -1 }, { 1, Long.MAX_VALUE },
+ { -1, Long.MAX_VALUE - 1 }, { Long.MIN_VALUE + 2, 1 }, };
+ ValueType[] expectedTypes = new ValueType[] { ValueType.FIXED_INTS_64,
+ ValueType.FIXED_INTS_64, ValueType.FIXED_INTS_64,
+ ValueType.FIXED_INTS_64, ValueType.VAR_INTS, ValueType.VAR_INTS,
+ ValueType.VAR_INTS, };
+ for (int i = 0; i < minMax.length; i++) {
+ Directory dir = newDirectory();
+ final AtomicLong trackBytes = new AtomicLong(0);
+ Writer w = Ints.getWriter(dir, "test", trackBytes, ValueType.VAR_INTS);
+ w.add(0, minMax[i][0]);
+ w.add(1, minMax[i][1]);
+ w.finish(2);
+ assertEquals(0, trackBytes.get());
+ IndexDocValues r = Ints.getValues(dir, "test");
+ Source source = getSource(r);
+ assertEquals(i + " with min: " + minMax[i][0] + " max: " + minMax[i][1],
+ expectedTypes[i], source.type());
+ assertEquals(minMax[i][0], source.getInt(0));
+ assertEquals(minMax[i][1], source.getInt(1));
+ ValuesEnum iEnum = getEnum(r);
+ assertEquals(i + " with min: " + minMax[i][0] + " max: " + minMax[i][1],
+ expectedTypes[i], iEnum.type());
+ assertEquals(0, iEnum.nextDoc());
+ assertEquals(minMax[i][0], iEnum.intsRef.get());
+ assertEquals(1, iEnum.nextDoc());
+ assertEquals(minMax[i][1], iEnum.intsRef.get());
+ assertEquals(ValuesEnum.NO_MORE_DOCS, iEnum.nextDoc());
- IndexDocValues r = Ints.getValues(dir, "test", false);
- for (int iter = 0; iter < 2; iter++) {
- Source s = getSource(r);
- for (int i = 0; i < NUM_VALUES; i++) {
- final long v = s.getInt(i);
- assertEquals("index " + i, values[i], v);
- }
+ r.close();
+ dir.close();
+ }
+ }
+
+ public void testVInts() throws IOException {
+ testInts(ValueType.VAR_INTS, 63);
+ }
+
+ public void testFixedInts() throws IOException {
+ testInts(ValueType.FIXED_INTS_64, 63);
+ testInts(ValueType.FIXED_INTS_32, 31);
+ testInts(ValueType.FIXED_INTS_16, 15);
+ testInts(ValueType.FIXED_INTS_8, 7);
+
+ }
+
+ public void testGetInt8Array() throws IOException {
+ byte[] sourceArray = new byte[] {1,2,3};
+ Directory dir = newDirectory();
+ final AtomicLong trackBytes = new AtomicLong(0);
+ Writer w = Ints.getWriter(dir, "test", trackBytes, ValueType.FIXED_INTS_8);
+ for (int i = 0; i < sourceArray.length; i++) {
+ w.add(i, (long) sourceArray[i]);
+ }
+ w.finish(sourceArray.length);
+ IndexDocValues r = Ints.getValues(dir, "test");
+ Source source = r.getSource();
+ assertTrue(source.hasArray());
+ byte[] loaded = ((byte[])source.getArray());
+ assertEquals(loaded.length, sourceArray.length);
+ for (int i = 0; i < loaded.length; i++) {
+ assertEquals("value didn't match at index " + i, sourceArray[i], loaded[i]);
+ }
+ r.close();
+ dir.close();
+ }
+
+ public void testGetInt16Array() throws IOException {
+ short[] sourceArray = new short[] {1,2,3};
+ Directory dir = newDirectory();
+ final AtomicLong trackBytes = new AtomicLong(0);
+ Writer w = Ints.getWriter(dir, "test", trackBytes, ValueType.FIXED_INTS_16);
+ for (int i = 0; i < sourceArray.length; i++) {
+ w.add(i, (long) sourceArray[i]);
+ }
+ w.finish(sourceArray.length);
+ IndexDocValues r = Ints.getValues(dir, "test");
+ Source source = r.getSource();
+ assertTrue(source.hasArray());
+ short[] loaded = ((short[])source.getArray());
+ assertEquals(loaded.length, sourceArray.length);
+ for (int i = 0; i < loaded.length; i++) {
+ assertEquals("value didn't match at index " + i, sourceArray[i], loaded[i]);
+ }
+ r.close();
+ dir.close();
+ }
+
+ public void testGetInt64Array() throws IOException {
+ long[] sourceArray = new long[] {1,2,3};
+ Directory dir = newDirectory();
+ final AtomicLong trackBytes = new AtomicLong(0);
+ Writer w = Ints.getWriter(dir, "test", trackBytes, ValueType.FIXED_INTS_64);
+ for (int i = 0; i < sourceArray.length; i++) {
+ w.add(i, sourceArray[i]);
+ }
+ w.finish(sourceArray.length);
+ IndexDocValues r = Ints.getValues(dir, "test");
+ Source source = r.getSource();
+ assertTrue(source.hasArray());
+ long[] loaded = ((long[])source.getArray());
+ assertEquals(loaded.length, sourceArray.length);
+ for (int i = 0; i < loaded.length; i++) {
+ assertEquals("value didn't match at index " + i, sourceArray[i], loaded[i]);
+ }
+ r.close();
+ dir.close();
+ }
+
+ public void testGetInt32Array() throws IOException {
+ int[] sourceArray = new int[] {1,2,3};
+ Directory dir = newDirectory();
+ final AtomicLong trackBytes = new AtomicLong(0);
+ Writer w = Ints.getWriter(dir, "test", trackBytes, ValueType.FIXED_INTS_32);
+ for (int i = 0; i < sourceArray.length; i++) {
+ w.add(i, (long) sourceArray[i]);
+ }
+ w.finish(sourceArray.length);
+ IndexDocValues r = Ints.getValues(dir, "test");
+ Source source = r.getSource();
+ assertTrue(source.hasArray());
+ int[] loaded = ((int[])source.getArray());
+ assertEquals(loaded.length, sourceArray.length);
+ for (int i = 0; i < loaded.length; i++) {
+ assertEquals("value didn't match at index " + i, sourceArray[i], loaded[i]);
+ }
+ r.close();
+ dir.close();
+ }
+
+ public void testGetFloat32Array() throws IOException {
+ float[] sourceArray = new float[] {1,2,3};
+ Directory dir = newDirectory();
+ final AtomicLong trackBytes = new AtomicLong(0);
+ Writer w = Floats.getWriter(dir, "test", 4, trackBytes);
+ for (int i = 0; i < sourceArray.length; i++) {
+ w.add(i, sourceArray[i]);
+ }
+ w.finish(sourceArray.length);
+ IndexDocValues r = Floats.getValues(dir, "test", 3);
+ Source source = r.getSource();
+ assertTrue(source.hasArray());
+ float[] loaded = ((float[])source.getArray());
+ assertEquals(loaded.length, sourceArray.length);
+ for (int i = 0; i < loaded.length; i++) {
+ assertEquals("value didn't match at index " + i, sourceArray[i], loaded[i], 0.0f);
+ }
+ r.close();
+ dir.close();
+ }
+
+ public void testGetFloat64Array() throws IOException {
+ double[] sourceArray = new double[] {1,2,3};
+ Directory dir = newDirectory();
+ final AtomicLong trackBytes = new AtomicLong(0);
+ Writer w = Floats.getWriter(dir, "test", 8, trackBytes);
+ for (int i = 0; i < sourceArray.length; i++) {
+ w.add(i, sourceArray[i]);
+ }
+ w.finish(sourceArray.length);
+ IndexDocValues r = Floats.getValues(dir, "test", 3);
+ Source source = r.getSource();
+ assertTrue(source.hasArray());
+ double[] loaded = ((double[])source.getArray());
+ assertEquals(loaded.length, sourceArray.length);
+ for (int i = 0; i < loaded.length; i++) {
+ assertEquals("value didn't match at index " + i, sourceArray[i], loaded[i], 0.0d);
+ }
+ r.close();
+ dir.close();
+ }
+
+ private void testInts(ValueType type, int maxBit) throws IOException {
+ long maxV = 1;
+ final int NUM_VALUES = 333 + random.nextInt(333);
+ final long[] values = new long[NUM_VALUES];
+ for (int rx = 1; rx < maxBit; rx++, maxV *= 2) {
+ Directory dir = newDirectory();
+ final AtomicLong trackBytes = new AtomicLong(0);
+ Writer w = Ints.getWriter(dir, "test", trackBytes, type);
+ for (int i = 0; i < NUM_VALUES; i++) {
+ final long v = random.nextLong() % (1 + maxV);
+ values[i] = v;
+ w.add(i, v);
+ }
+ final int additionalDocs = 1 + random.nextInt(9);
+ w.finish(NUM_VALUES + additionalDocs);
+ assertEquals(0, trackBytes.get());
+
+ IndexDocValues r = Ints.getValues(dir, "test");
+ for (int iter = 0; iter < 2; iter++) {
+ Source s = getSource(r);
+ assertEquals(type, s.type());
+ for (int i = 0; i < NUM_VALUES; i++) {
+ final long v = s.getInt(i);
+ assertEquals("index " + i, values[i], v);
}
+ }
- for (int iter = 0; iter < 2; iter++) {
- ValuesEnum iEnum = getEnum(r);
- LongsRef ints = iEnum.getInt();
- for (int i = 0; i < NUM_VALUES + additionalDocs; i++) {
- assertEquals(i, iEnum.nextDoc());
- if (i < NUM_VALUES) {
- assertEquals(values[i], ints.get());
- } else {
- assertEquals(0, ints.get());
- }
+ for (int iter = 0; iter < 2; iter++) {
+ ValuesEnum iEnum = getEnum(r);
+ assertEquals(type, iEnum.type());
+ LongsRef ints = iEnum.getInt();
+ for (int i = 0; i < NUM_VALUES + additionalDocs; i++) {
+ assertEquals(i, iEnum.nextDoc());
+ if (i < NUM_VALUES) {
+ assertEquals(values[i], ints.get());
+ } else {
+ assertEquals(0, ints.get());
}
- assertEquals(ValuesEnum.NO_MORE_DOCS, iEnum.nextDoc());
- iEnum.close();
}
+ assertEquals(ValuesEnum.NO_MORE_DOCS, iEnum.nextDoc());
+ iEnum.close();
+ }
- for (int iter = 0; iter < 2; iter++) {
- ValuesEnum iEnum = getEnum(r);
- LongsRef ints = iEnum.getInt();
- for (int i = 0; i < NUM_VALUES + additionalDocs; i += 1 + random.nextInt(25)) {
- assertEquals(i, iEnum.advance(i));
- if (i < NUM_VALUES) {
- assertEquals(values[i], ints.get());
- } else {
- assertEquals(0, ints.get());
- }
+ for (int iter = 0; iter < 2; iter++) {
+ ValuesEnum iEnum = getEnum(r);
+ assertEquals(type, iEnum.type());
+ LongsRef ints = iEnum.getInt();
+ for (int i = 0; i < NUM_VALUES + additionalDocs; i += 1 + random.nextInt(25)) {
+ assertEquals(i, iEnum.advance(i));
+ if (i < NUM_VALUES) {
+ assertEquals(values[i], ints.get());
+ } else {
+ assertEquals(0, ints.get());
}
- assertEquals(ValuesEnum.NO_MORE_DOCS, iEnum.advance(NUM_VALUES + additionalDocs));
- iEnum.close();
}
- r.close();
- dir.close();
+ assertEquals(ValuesEnum.NO_MORE_DOCS, iEnum.advance(NUM_VALUES + additionalDocs));
+ iEnum.close();
}
+ r.close();
+ dir.close();
}
}
Index: lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java
===================================================================
--- lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java (revision 1139823)
+++ lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java (working copy)
@@ -113,44 +113,20 @@
dir.close();
}
- /**
- * Tests complete indexing of {@link ValueType} including deletions, merging and
- * sparse value fields on Compound-File
- */
- public void testIndexBytesNoDeletesCFS() throws IOException {
- runTestIndexBytes(writerConfig(true), false);
- }
-
- public void testIndexBytesDeletesCFS() throws IOException {
- runTestIndexBytes(writerConfig(true), true);
- }
-
- public void testIndexNumericsNoDeletesCFS() throws IOException {
- runTestNumerics(writerConfig(true), false);
- }
-
- public void testIndexNumericsDeletesCFS() throws IOException {
- runTestNumerics(writerConfig(true), true);
- }
-
- /**
- * Tests complete indexing of {@link ValueType} including deletions, merging and
- * sparse value fields on None-Compound-File
- */
public void testIndexBytesNoDeletes() throws IOException {
- runTestIndexBytes(writerConfig(false), false);
+ runTestIndexBytes(writerConfig(random.nextBoolean()), false);
}
public void testIndexBytesDeletes() throws IOException {
- runTestIndexBytes(writerConfig(false), true);
+ runTestIndexBytes(writerConfig(random.nextBoolean()), true);
}
public void testIndexNumericsNoDeletes() throws IOException {
- runTestNumerics(writerConfig(false), false);
+ runTestNumerics(writerConfig(random.nextBoolean()), false);
}
public void testIndexNumericsDeletes() throws IOException {
- runTestNumerics(writerConfig(false), true);
+ runTestNumerics(writerConfig(random.nextBoolean()), true);
}
public void testAddIndexes() throws IOException {
@@ -204,7 +180,11 @@
case BYTES_VAR_STRAIGHT:
case FLOAT_32:
case FLOAT_64:
- case INTS:
+ case VAR_INTS:
+ case FIXED_INTS_16:
+ case FIXED_INTS_32:
+ case FIXED_INTS_64:
+ case FIXED_INTS_8:
assertEquals(msg, valuesPerIndex-1, vE_2_merged.advance(valuesPerIndex-1));
}
@@ -246,7 +226,7 @@
throws IOException {
Directory d = newDirectory();
IndexWriter w = new IndexWriter(d, cfg);
- final int numValues = 179 + random.nextInt(151);
+ final int numValues = 50 + atLeast(10);
final List<ValueType> numVariantList = new ArrayList<ValueType>(NUMERICS);
// run in random order to test if fill works correctly during merges
@@ -258,8 +238,16 @@
IndexReader r = IndexReader.open(w, true);
final int numRemainingValues = (int) (numValues - deleted.cardinality());
final int base = r.numDocs() - numRemainingValues;
+ // for FIXED_INTS_8 we use value mod 128 - to enable testing in
+ // one go we simply use numValues as the mod for all other INT types
+ int mod = numValues;
switch (val) {
- case INTS: {
+ case FIXED_INTS_8:
+ mod = 128;
+ case FIXED_INTS_16:
+ case FIXED_INTS_32:
+ case FIXED_INTS_64:
+ case VAR_INTS: {
IndexDocValues intsReader = getDocValues(r, val.name());
assertNotNull(intsReader);
@@ -283,8 +271,8 @@
}
assertEquals("advance failed at index: " + i + " of " + r.numDocs()
+ " docs", i, intsEnum.advance(i));
- assertEquals(expected, ints.getInt(i));
- assertEquals(expected, enumRef.get());
+ assertEquals(val + "" + mod + " " + i, expected%mod, ints.getInt(i));
+ assertEquals(expected%mod, enumRef.get());
}
}
@@ -338,11 +326,11 @@
final List<ValueType> byteVariantList = new ArrayList<ValueType>(BYTES);
// run in random order to test if fill works correctly during merges
Collections.shuffle(byteVariantList, random);
- final int numValues = 179 + random.nextInt(151);
+ final int numValues = 50 + atLeast(10);
for (ValueType byteIndexValue : byteVariantList) {
List<Closeable> closeables = new ArrayList<Closeable>();
- int bytesSize = 1 + random.nextInt(128);
+ int bytesSize = 1 + atLeast(10);
OpenBitSet deleted = indexValues(w, numValues, byteIndexValue,
byteVariantList, withDeletions, bytesSize);
final IndexReader r = IndexReader.open(w, withDeletions);
@@ -485,8 +473,12 @@
ValueType.BYTES_FIXED_SORTED, ValueType.BYTES_FIXED_STRAIGHT, ValueType.BYTES_VAR_DEREF,
ValueType.BYTES_VAR_SORTED, ValueType.BYTES_VAR_STRAIGHT);
- private static EnumSet<ValueType> NUMERICS = EnumSet.of(ValueType.INTS,
- ValueType.FLOAT_32, ValueType.FLOAT_64);
+ private static EnumSet<ValueType> NUMERICS = EnumSet.of(ValueType.VAR_INTS,
+ ValueType.FIXED_INTS_16, ValueType.FIXED_INTS_32,
+ ValueType.FIXED_INTS_64,
+ ValueType.FIXED_INTS_8,
+ ValueType.FLOAT_32,
+ ValueType.FLOAT_64);
private static Index[] IDX_VALUES = new Index[] { Index.ANALYZED,
Index.ANALYZED_NO_NORMS, Index.NOT_ANALYZED, Index.NOT_ANALYZED_NO_NORMS,
@@ -517,8 +509,20 @@
for (int i = 0; i < numValues; i++) {
if (isNumeric) {
switch (value) {
- case INTS:
- valField.setInt(i);
+ case VAR_INTS:
+ valField.setInt((long)i);
+ break;
+ case FIXED_INTS_16:
+ valField.setInt((short)i, random.nextInt(10) != 0);
+ break;
+ case FIXED_INTS_32:
+ valField.setInt(i, random.nextInt(10) != 0);
+ break;
+ case FIXED_INTS_64:
+ valField.setInt((long)i, random.nextInt(10) != 0);
+ break;
+ case FIXED_INTS_8:
+ valField.setInt((byte)(0xFF & (i % 128)), random.nextInt(10) != 0);
break;
case FLOAT_32:
valField.setFloat(2.0f * i);
@@ -526,6 +530,7 @@
case FLOAT_64:
valField.setFloat(2.0d * i);
break;
+
default:
fail("unexpected value " + value);
}
Index: lucene/src/test/org/apache/lucene/search/TestSort.java
===================================================================
--- lucene/src/test/org/apache/lucene/search/TestSort.java (revision 1139823)
+++ lucene/src/test/org/apache/lucene/search/TestSort.java (working copy)
@@ -124,7 +124,7 @@
doc.add (new Field ("contents", data[i][1], Field.Store.NO, Field.Index.ANALYZED));
if (data[i][2] != null) {
Field f = supportsDocValues ?
- IndexDocValuesField.set(new Field ("int", data[i][2], Field.Store.NO, Field.Index.NOT_ANALYZED), ValueType.INTS)
+ IndexDocValuesField.set(new Field ("int", data[i][2], Field.Store.NO, Field.Index.NOT_ANALYZED), ValueType.VAR_INTS)
: new Field ("int", data[i][2], Field.Store.NO, Field.Index.NOT_ANALYZED);
doc.add(f);
}