| Index: src/java/org/apache/lucene/util/packed/PackedInts.java |
| =================================================================== |
| --- src/java/org/apache/lucene/util/packed/PackedInts.java Fri Feb 12 02:52:38 CET 2010 |
| +++ src/java/org/apache/lucene/util/packed/PackedInts.java Fri Feb 12 02:52:38 CET 2010 |
| @@ -0,0 +1,386 @@ |
| +package org.apache.lucene.util.packed; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +// nocommit -- rename to UnsignedPackedInts? or pull |
| +// minValue down |
| + |
| +import org.apache.lucene.store.IndexOutput; |
| +import org.apache.lucene.store.IndexInput; |
| +import org.apache.lucene.util.CodecUtil; |
| +import org.apache.lucene.util.Constants; |
| +import org.apache.lucene.util.ConsumesRAM; |
| + |
| +import java.io.IOException; |
| + |
| +/** |
| + * Simplistic compression for array of long values, where |
| + * each value is >= 0 and <= a specified maximum value. The |
| + * values are stored as packed ints, with each value |
| + * consuming a fixed number of bits. |
| + * |
| + * <p>NOTE: this class is meant only to be used internally |
| + * by Lucene; it's only public so it can be shared across |
| + * packages. This means the API is freely subject to |
| + * change, and, the class could be removed entirely, in any |
| + * Lucene release. Use directly at your own risk! |
| + */ |
| + |
| +// nocommit |
| +// - do we need int/long variants (for perf)? or long |
| +// only suffices? |
| +// - what native type is best perf? long/int/short/byte? |
| + |
| +public class PackedInts { |
| + |
| + private final static String CODEC_NAME = "PackedInts"; |
| + private final static int VERSION_START = 0; |
| + private final static int VERSION_CURRENT = 0; |
| + |
| + /** |
| + * The priority for selecting the Reader and Writer implementation. |
| + * </p><p> |
| + * packed: Pack the bits right after each other.<br /> |
| + * aligned: Pack bits so that no values cross block boundaries.<br /> |
| + * auto: Guesstimate the best implementation. |
| + * </p><p> |
| + * Note: When a more efficient structure (in terms of memory as well as speed) |
| + * can be substituted without penalty, this will be done. Example: |
| + * Asking for packed with 3 bits/value will return packed32 or packed64, while |
| + * asking for packed with 4 bits/value will return aligned32 or aligned64. |
| + * Asking for aligned with 7 bits/value and block preferences bit32 will |
| + * return directByte, as this amount of space used by an aligned32 with 7 |
| + * bits/value is the same as directByte, while directByte is less processor- |
| + * intensive. |
| + * </p><p> |
| + * Note: 63 bits/value will always be mapped to a directLong, due to the |
| + * problem of stating maxValues > 2^63-1. |
| + */ |
| + public enum PRIORITY {packed, aligned, auto} |
| + |
| + /** |
| + * The preference for the underlying blocks for packed or aligned structures. |
| + * Using 64bit blocks (longs) on a 32bit machine is slower than using 32bit |
| + * blocks (ints). |
| + */ |
| + public enum BLOCK_PREFERENCE {bit32(32), bit64(64); |
| + private int bits; |
| + BLOCK_PREFERENCE(int bits) { |
| + this.bits = bits; |
| + } |
| + public int getBits() { |
| + return bits; |
| + } |
| + } |
| + |
| + /** |
| + * The specific implementation derived from bits/value, PRIORITY and |
| + * BLOCK_PREFERENCE. |
| + */ |
| + private enum IMPLEMENTATION {packed32, packed64, aligned32, aligned64, |
| + directByte, directShort, directInt, directLong} |
| + |
| + /** |
| + * |
| + */ |
| + enum PERSISTENCE {packed, aligned32, aligned64} |
| + |
| + /** |
| + * Derives the optimal IMPLEMENTATION based on the given preferences. |
| + * @param bitsPerValue the number of bits available for any given value. |
| + * @param priority memory/speed trade-off. |
| + * @param block the expected architecture for the system that will |
| + * use the Reader-part of the structure. |
| + * @return the implementation to use. |
| + */ |
| + private static IMPLEMENTATION getImplementation( |
| + int bitsPerValue, PRIORITY priority, BLOCK_PREFERENCE block) { |
| + switch (priority) { |
| + case aligned: { |
| + if (block == BLOCK_PREFERENCE.bit32) { |
| + if (bitsPerValue == 7 || bitsPerValue >= 11) { |
| + bitsPerValue = getNextFixedSize(bitsPerValue); // Align to byte, short, int or long |
| + } |
| + } else { |
| + if ((bitsPerValue >= 13 && bitsPerValue <= 15) || |
| + (bitsPerValue >= 22)) { |
| + bitsPerValue = getNextFixedSize(bitsPerValue); // Align to short, int or long |
| + } |
| + } |
| + } |
| + } |
| + if (priority == PRIORITY.auto) { |
| + if (bitsPerValue > 58 || (bitsPerValue < 32 && bitsPerValue > 29)) { // 10% space-waste is ok |
| + bitsPerValue = getNextFixedSize(bitsPerValue); |
| + } |
| + } |
| + |
| + switch (bitsPerValue) { // The safe choices |
| + case 8: return IMPLEMENTATION.directByte; |
| + case 16: return IMPLEMENTATION.directShort; |
| + case 31: |
| + case 32: return IMPLEMENTATION.directInt; |
| + case 63: |
| + case 64: return IMPLEMENTATION.directLong; |
| + } |
| + |
| + if (priority == PRIORITY.aligned || |
| + bitsPerValue == 1 || bitsPerValue == 2 || bitsPerValue == 4) { |
| + if (block == BLOCK_PREFERENCE.bit32 && bitsPerValue < 32) { |
| + return IMPLEMENTATION.aligned32; |
| + } |
| + return IMPLEMENTATION.aligned64; |
| + } |
| + return block == BLOCK_PREFERENCE.bit32 && bitsPerValue < 32 ? |
| + IMPLEMENTATION.packed32 : IMPLEMENTATION.packed64; |
| + } |
| + |
| + /** |
| + * Derives the optimal IMPLEMENTATION based on the given preferences. |
| + * Used for selecting the correct implementation from persistent data. |
| + * @param persistence the format of the existing data. |
| + * @param bitsPerValue the number of bits available for any given value. |
| + * @param block the expected architecture for the system that will |
| + * use the Reader-part of the structure. |
| + * @return the implementation to use. |
| + */ |
| + private static IMPLEMENTATION getImplementation( |
| + PERSISTENCE persistence, int bitsPerValue, BLOCK_PREFERENCE block) { |
| + switch (bitsPerValue) { // The safe choices |
| + case 1: |
| + case 2: |
| + case 4: { |
| + if (block == BLOCK_PREFERENCE.bit32) { |
| + return IMPLEMENTATION.aligned32; |
| + } |
| + return IMPLEMENTATION.aligned64; |
| + } |
| + case 8: return IMPLEMENTATION.directByte; |
| + case 16: return IMPLEMENTATION.directShort; |
| + case 31: |
| + case 32: return IMPLEMENTATION.directInt; |
| + case 63: |
| + case 64: return IMPLEMENTATION.directLong; |
| + } |
| + if (persistence == PERSISTENCE.aligned32) { |
| + return IMPLEMENTATION.aligned32; |
| + } else if (persistence == PERSISTENCE.aligned64) { |
| + return IMPLEMENTATION.aligned64; |
| + } |
| + return block == BLOCK_PREFERENCE.bit32 && bitsPerValue < 32 ? |
| + IMPLEMENTATION.packed32 : IMPLEMENTATION.packed64; |
| + } |
| + |
| + /** Returns how many bits are required to hold values up |
| + * to and including maxValue */ |
| + public static int bitsRequired(long maxValue) { |
| + // Very high long values does not translate well to double, so we do an |
| + // explicit check for the edge cases |
| + if (maxValue > 0x3FFFFFFFFFFFFFFFL) { |
| + return 63; |
| + } if (maxValue > 0x1FFFFFFFFFFFFFFFL) { |
| + return 62; |
| + } |
| + return (int) Math.ceil(Math.log(1+maxValue)/Math.log(2.0)); |
| + } |
| + |
| + /** |
| + * Calculates the maximum unsigned long that can be expressed with the given |
| + * number of bits. |
| + * @param bitsPerValue the number of bits available for any given value. |
| + * @return the maximum value for the given bits. |
| + */ |
| + public static long maxValue(int bitsPerValue) { |
| + return bitsPerValue == 64 ? Long.MAX_VALUE : ~(~0L << bitsPerValue); |
| + } |
| + |
| + private static int getNextFixedSize(int bits) { |
| + if (bits <= 8) { |
| + return 8; |
| + } else if (bits <= 16) { |
| + return 16; |
| + } else if (bits <= 32) { |
| + return 32; |
| + } else { |
| + return 64; |
| + } |
| + } |
| + |
| + /** Write-once */ |
| + public static abstract class Writer { |
| + protected final IndexOutput out; |
| + protected final int bitsPerValue; |
| + protected final int valueCount; |
| + |
| + protected Writer(IndexOutput out, int valueCount, |
| + int bitsPerValue, PERSISTENCE persistence) |
| + throws IOException { |
| + assert bitsPerValue <= 64; |
| + |
| + this.out = out; |
| + this.valueCount = valueCount; |
| + this.bitsPerValue = bitsPerValue; |
| + CodecUtil.writeHeader(out, CODEC_NAME, VERSION_START); |
| + out.writeString(persistence.toString()); |
| + out.writeVInt(bitsPerValue); |
| + out.writeVInt(valueCount); |
| +// System.out.println("Writer PERSISTENCE: " + persistence + " bitsPerValue: " + bitsPerValue); |
| + } |
| + |
| + public abstract void add(long v) throws IOException; |
| + public abstract void finish() throws IOException; |
| + } |
| + |
| + public static Writer getWriter( |
| + IndexOutput out, int valueCount, int bitsPerValue, |
| + PRIORITY priority, BLOCK_PREFERENCE block) throws IOException { |
| + IMPLEMENTATION implementation = getImplementation( |
| + bitsPerValue, priority, block); |
| + switch (implementation) { |
| + case packed32: |
| + case packed64: |
| + return new PackedWriter(out, valueCount, bitsPerValue); |
| + case directByte: |
| + return new PackedWriter(out, valueCount, 8); |
| + case directShort: |
| + return new PackedWriter(out, valueCount, 16); |
| + case directInt: |
| + return new PackedWriter(out, valueCount, 32); |
| + case directLong: |
| + return new PackedWriter(out, valueCount, 64); |
| + case aligned32: |
| + return new PackedAlignedWriter( |
| + out, valueCount, bitsPerValue, BLOCK_PREFERENCE.bit32); |
| + case aligned64: |
| + return new PackedAlignedWriter( |
| + out, valueCount, bitsPerValue, BLOCK_PREFERENCE.bit64); |
| + default: throw new UnsupportedOperationException( |
| + implementation + " is not implemented yet"); |
| + } |
| + } |
| + |
| + public static Mutable getMutable( |
| + int valueCount, int bitsPerValue, |
| + PRIORITY priority, BLOCK_PREFERENCE block) throws IOException { |
| + IMPLEMENTATION implementation = getImplementation( |
| + bitsPerValue, priority, block); |
| + switch (implementation) { |
| + case packed32: return new Packed32(valueCount, bitsPerValue); |
| + case packed64: return new Packed64(valueCount, bitsPerValue); |
| + case directByte: return new PackedDirectByte(valueCount); |
| + case directShort: return new PackedDirectShort(valueCount); |
| + case directInt: return new PackedDirectInt(valueCount); |
| + case directLong: return new PackedDirectLong(valueCount); |
| + case aligned32: return new PackedAligned32(valueCount, bitsPerValue); |
| + case aligned64: return new PackedAligned64(valueCount, bitsPerValue); |
| + default: throw new UnsupportedOperationException( |
| + implementation + " is not implemented yet"); |
| + } |
| + } |
| + |
| + public static interface Reader extends ConsumesRAM { |
| + /** |
| + * @param index the position of the wanted value. |
| + * @return the value at the stated index. |
| + */ |
| + long get(int index); |
| + |
| + /** |
| + * @return the number of bits used to store any given value. |
| + * Note: This does not imply that memory usage is |
| + * {@code bitsPerValue * #values} as implementations are free to |
| + * use non-space-optimal packing of bits. |
| + */ |
| + int getBitsPerValue(); |
| + |
| + /** |
| + * @return the number of values. |
| + */ |
| + int size(); |
| + } |
| + |
| + /** |
| + * A packed integer array that can be modified. |
| + */ |
| + public static interface Mutable extends Reader { |
| + /** |
| + * Set the value at the given index in the array. |
| + * @param index where the value should be positioned. |
| + * @param value a value conforming to the constraints set by the array. |
| + */ |
| + void set(int index, long value); |
| + |
| + /** |
| + * Sets all values to 0. |
| + */ |
| + void clear(); |
| + } |
| + |
| + public static abstract class ReaderImpl implements Reader { |
| + protected final int bitsPerValue; |
| + protected final int valueCount; |
| + |
| + protected ReaderImpl(int valueCount, int bitsPerValue) { |
| + this.bitsPerValue = bitsPerValue; |
| + this.valueCount = valueCount; |
| + } |
| + |
| + public int getBitsPerValue() { |
| + return bitsPerValue; |
| + } |
| + |
| + public int size() { |
| + return valueCount; |
| + } |
| + |
| + public long getMaxValue() { // Convenience method |
| + return maxValue(bitsPerValue); |
| + } |
| + } |
| + |
| + public static Reader getReader(IndexInput in) throws IOException { |
| + return getReader(in, Constants.JRE_IS_64BIT ? |
| + BLOCK_PREFERENCE.bit64 : BLOCK_PREFERENCE.bit32); |
| + } |
| + public static Reader getReader(IndexInput in, BLOCK_PREFERENCE block) |
| + throws IOException { |
| + CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START); |
| + String pStr = in.readString(); |
| + PERSISTENCE persistence = PERSISTENCE.valueOf(pStr); |
| + final int bitsPerValue = in.readVInt(); |
| + final int valueCount = in.readVInt(); |
| +// final long maxValue = in.readVLong(); |
| + |
| + IMPLEMENTATION implementation = |
| + getImplementation(persistence, bitsPerValue, block); |
| +// System.out.println("getReader PERSISTENCE: " + persistence + " bitsPerValue: " + bitsPerValue + " IMPLEMENTATION: " + implementation); |
| + switch (implementation) { |
| + case packed32: return new Packed32(in, valueCount, bitsPerValue); |
| + case packed64: return new Packed64(in, valueCount, bitsPerValue); |
| + case aligned32: return new PackedAligned32(in, valueCount, bitsPerValue); |
| + case aligned64: return new PackedAligned64(in, valueCount, bitsPerValue); |
| + case directByte: return new PackedDirectByte(in, valueCount); |
| + case directShort: return new PackedDirectShort(in, valueCount); |
| + case directInt: return new PackedDirectInt(in, valueCount); |
| + case directLong: return new PackedDirectLong(in, valueCount); |
| + default: throw new UnsupportedOperationException("Not implemented yet"); |
| + } |
| + |
| + // TODO an mmap reader as well? |
| + } |
| +} |
| Index: src/java/org/apache/lucene/util/CodecUtil.java |
| =================================================================== |
| --- src/java/org/apache/lucene/util/CodecUtil.java Fri Jan 22 12:58:35 CET 2010 |
| +++ src/java/org/apache/lucene/util/CodecUtil.java Fri Jan 22 12:58:35 CET 2010 |
| @@ -0,0 +1,72 @@ |
| +package org.apache.lucene.util; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| + |
| +/** |
| + * <p>NOTE: this class is meant only to be used internally |
| + * by Lucene; it's only public so it can be shared across |
| + * packages. This means the API is freely subject to |
| + * change, and, the class could be removed entirely, in any |
| + * Lucene release. Use directly at your own risk! |
| + */ |
| + |
| +import org.apache.lucene.store.IndexOutput; |
| +import org.apache.lucene.store.IndexInput; |
| +import org.apache.lucene.index.CorruptIndexException; |
| + |
| +import java.io.IOException; |
| + |
| +public final class CodecUtil { |
| + private final static int CODEC_MAGIC = 0x3fd76c17; |
| + |
| + public static void writeHeader(IndexOutput out, String codec, int version) |
| + throws IOException { |
| + final long start = out.getFilePointer(); |
| + out.writeInt(CODEC_MAGIC); |
| + out.writeString(codec); |
| + out.writeInt(version); |
| + |
| + // We require this so we can easily pre-compute header length |
| + if (out.getFilePointer()-start != codec.length()+9) { |
| + throw new IllegalArgumentException("codec must be simple ASCII, less than 128 characters in length [got " + codec + "]"); |
| + } |
| + } |
| + |
| + public static int headerLength(String codec) { |
| + return 9+codec.length(); |
| + } |
| + |
| + public static int checkHeader(IndexInput in, String codec, int maxVersion) |
| + throws IOException { |
| + final int actualHeader = in.readInt(); |
| + if (actualHeader != CODEC_MAGIC) { |
| + throw new CorruptIndexException("codec header mismatch: actual header=" + actualHeader + " vs expected header=" + CODEC_MAGIC); |
| + } |
| + final String actualCodec = in.readString(); |
| + if (!actualCodec.equals(codec)) { |
| + throw new CorruptIndexException("codec mismatch: actual codec=" + actualCodec + " vs expected codec=" + codec); |
| + } |
| + final int actualVersion = in.readInt(); |
| + if (actualVersion > maxVersion) { |
| + throw new CorruptIndexException("version " + actualVersion + " is too new (expected <= version " + maxVersion + ")"); |
| + } |
| + |
| + return actualVersion; |
| + } |
| +} |
| Index: src/java/org/apache/lucene/util/packed/PackedDirectLong.java |
| =================================================================== |
| --- src/java/org/apache/lucene/util/packed/PackedDirectLong.java Fri Feb 12 01:29:57 CET 2010 |
| +++ src/java/org/apache/lucene/util/packed/PackedDirectLong.java Fri Feb 12 01:29:57 CET 2010 |
| @@ -0,0 +1,79 @@ |
| +package org.apache.lucene.util.packed; |
| + |
| +import org.apache.lucene.store.IndexInput; |
| +import org.apache.lucene.store.IndexOutput; |
| +import org.apache.lucene.util.RamUsageEstimator; |
| + |
| +import java.io.IOException; |
| +import java.util.Arrays; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +/** |
| + * Direct wrapping of 32 bit values to a backing array of ints. |
| + */ |
| +public class PackedDirectLong extends PackedInts.ReaderImpl |
| + implements PackedInts.Mutable { |
| + private long[] blocks; |
| + private static final int BITS_PER_VALUE = 64; |
| + |
| + public PackedDirectLong(int valueCount) { |
| + super(valueCount, BITS_PER_VALUE); |
| + blocks = new long[valueCount]; |
| + } |
| + |
| + public PackedDirectLong(IndexInput in, int valueCount) throws IOException { |
| + super(valueCount, BITS_PER_VALUE); |
| + long[] blocks = new long[valueCount]; |
| + for(int i=0;i<valueCount;i++) { |
| + blocks[i] = in.readLong(); |
| + } |
| + |
| + this.blocks = blocks; |
| + } |
| + |
| + |
| + /** |
| + * Creates an array backed by the given blocks. |
| + * </p><p> |
| + * Note: The blocks are used directly, so changes to the given block will |
| + * affect the structure. |
| + * @param blocks used as the internal backing array. |
| + */ |
| + public PackedDirectLong(long[] blocks) { |
| + super(blocks.length, BITS_PER_VALUE); |
| + this.blocks = blocks; |
| + } |
| + |
| + public long get(final int index) { |
| + return blocks[index]; |
| + } |
| + |
| + public void set(final int index, final long value) { |
| + blocks[index] = value; |
| + } |
| + |
| + public long ramBytesUsed() { |
| + return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + |
| + blocks.length * RamUsageEstimator.NUM_BYTES_LONG; |
| + } |
| + |
| + public void clear() { |
| + Arrays.fill(blocks, 0L); |
| + } |
| +} |
| \ No newline at end of file |
| Index: src/java/org/apache/lucene/util/packed/Packed32.java |
| =================================================================== |
| --- src/java/org/apache/lucene/util/packed/Packed32.java Fri Feb 12 01:39:36 CET 2010 |
| +++ src/java/org/apache/lucene/util/packed/Packed32.java Fri Feb 12 01:39:36 CET 2010 |
| @@ -0,0 +1,218 @@ |
| +package org.apache.lucene.util.packed; |
| + |
| +import org.apache.lucene.store.IndexInput; |
| +import org.apache.lucene.util.RamUsageEstimator; |
| + |
| +import java.io.IOException; |
| +import java.util.Arrays; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +/** |
| + * Space optimized random access capable array of values with a fixed number of |
| + * bits. The maximum number of bits/value is 31. Use {@link Packed64} for higher |
| + * numbers. |
| + * </p><p> |
| + * The implementation strives to avoid conditionals and expensive operations, |
| + * sacrificing code clarity to achieve better performance. |
| + */ |
| +public class Packed32 extends PackedInts.ReaderImpl |
| + implements PackedInts.Mutable { |
| + static final int BLOCK_SIZE = 32; // 32 = int, 64 = long |
| + static final int BLOCK_BITS = 5; // The #bits representing BLOCK_SIZE |
| + static final int MOD_MASK = BLOCK_SIZE - 1; // x % BLOCK_SIZE |
| + |
| + private static final int ENTRY_SIZE = BLOCK_SIZE + 1; |
| + private static final int FAC_BITPOS = 3; |
| + |
| + /* |
| + * In order to make an efficient value-getter, conditionals should be |
| + * avoided. A value can be positioned inside of a block, requiring shifting |
| + * left or right or it can span two blocks, requiring a left-shift on the |
| + * first block and a right-shift on the right block. |
| + * </p><p> |
| + * By always shifting the first block both left and right, we get exactly |
| + * the right bits. By always shifting the second block right and applying |
| + * a mask, we get the right bits there. After that, we | the two bitsets. |
| + */ |
| + private static final int[][] SHIFTS = |
| + new int[ENTRY_SIZE][ENTRY_SIZE * FAC_BITPOS]; |
| + private static final int[][] MASKS = new int[ENTRY_SIZE][ENTRY_SIZE]; |
| + |
| + static { // Generate shifts |
| + for (int elementBits = 1 ; elementBits <= BLOCK_SIZE ; elementBits++) { |
| + for (int bitPos = 0 ; bitPos < BLOCK_SIZE ; bitPos++) { |
| + int[] currentShifts = SHIFTS[elementBits]; |
| + int base = bitPos * FAC_BITPOS; |
| + currentShifts[base ] = bitPos; |
| + currentShifts[base + 1] = BLOCK_SIZE - elementBits; |
| + if (bitPos <= BLOCK_SIZE - elementBits) { // Single block |
| + currentShifts[base + 2] = 0; |
| + MASKS[elementBits][bitPos] = 0; |
| + } else { // Two blocks |
| + int rBits = elementBits - (BLOCK_SIZE - bitPos); |
| + currentShifts[base + 2] = BLOCK_SIZE - rBits; |
| + MASKS[elementBits][bitPos] = ~(~0 << rBits); |
| + } |
| + } |
| + } |
| + } |
| + |
| + /* |
| + * The setter requires more masking than the getter. |
| + */ |
| + private static final int[][] WRITE_MASKS = |
| + new int[ENTRY_SIZE][ENTRY_SIZE * FAC_BITPOS]; |
| + static { |
| + for (int elementBits = 1 ; elementBits <= BLOCK_SIZE ; elementBits++) { |
| + int elementPosMask = ~(~0 << elementBits); |
| + int[] currentShifts = SHIFTS[elementBits]; |
| + int[] currentMasks = WRITE_MASKS[elementBits]; |
| + for (int bitPos = 0 ; bitPos < BLOCK_SIZE ; bitPos++) { |
| + int base = bitPos * FAC_BITPOS; |
| + currentMasks[base ] =~((elementPosMask |
| + << currentShifts[base + 1]) |
| + >>> currentShifts[base]); |
| + currentMasks[base+1] = ~(elementPosMask |
| + << currentShifts[base + 2]); |
| + currentMasks[base+2] = currentShifts[base + 2] == 0 ? 0 : ~0; |
| + } |
| + } |
| + } |
| + |
| + /* The bits */ |
| + private int[] blocks; |
| + |
| + // Cached calculations |
| + private int maxPos; // blocks.length * BLOCK_SIZE / bitsPerValue - 1 |
| + private int[] shifts; // The shifts for the current bitsPerValue |
| + private int[] readMasks; |
| + private int[] writeMasks; |
| + |
| + /** |
| + * Creates an array with the internal structures adjusted for the given |
| + * limits and initialized to 0. |
| + * @param valueCount the number of elements. |
| + * @param bitsPerValue the number of bits available for any given value. |
| + * Note: bitsPerValue >32 is not supported by this implementation. |
| + */ |
| + public Packed32(int valueCount, int bitsPerValue) { |
| + this(new int[(int)(((long)valueCount) * bitsPerValue / BLOCK_SIZE + 2)], |
| + valueCount, bitsPerValue); |
| + } |
| + |
| + /** |
| + * Creates an array with content retrieved from the given IndexInput. |
| + * @param in an IndexInput, positioned at the start of Packed64-content. |
| + * @param valueCount the number of elements. |
| + * @param bitsPerValue the number of bits available for any given value. |
| + * @throws java.io.IOException if the values for the backing array could not |
| + * be retrieved. |
| + */ |
| + public Packed32(IndexInput in, int valueCount, int bitsPerValue) |
| + throws IOException { |
| + super(valueCount, bitsPerValue); |
| + int size = size(bitsPerValue, valueCount); |
| + blocks = new int[size+1]; // +1 due to non-conditional tricks |
| + for(int i=0;i<size;i++) { |
| + blocks[i] = in.readInt(); |
| + } |
| + updateCached(); |
| + } |
| + |
| + private static int size(int bitsPerValue, int valueCount) { |
| + final long totBitCount = (long) valueCount * bitsPerValue; |
| + return (int) (totBitCount/32 + ((totBitCount % 32 == 0 ) ? 0:1)); |
| + } |
| + |
| + |
| + /** |
| + * Creates an array backed by the given blocks. |
| + * </p><p> |
| + * Note: The blocks are used directly, so changes to the given block will |
| + * affect the Packed32-structure. |
| + * @param blocks used as the internal backing array. |
| + * @param valueCount the number of values. |
| + * @param bitsPerValue the number of bits available for any given value. |
| + * Note: bitsPerValue >32 is not supported by this implementation. |
| + */ |
| + public Packed32(int[] blocks, int valueCount, int bitsPerValue) { |
| + // TODO: Check that blocks.length is sufficient for holding length values |
| + super(valueCount, bitsPerValue); |
| + if (bitsPerValue > 31) { |
| + throw new IllegalArgumentException(String.format( |
| + "This array only supports values of 31 bits or less. The " |
| + + "required number of bits was %d. The Packed64 " |
| + + "implementation allows values with more than 31 bits", |
| + bitsPerValue)); |
| + } |
| + this.blocks = blocks; |
| + updateCached(); |
| + } |
| + |
| + private void updateCached() { |
| + readMasks = MASKS[bitsPerValue]; |
| + maxPos = (int)((((long)blocks.length) * BLOCK_SIZE / bitsPerValue) - 2); |
| + shifts = SHIFTS[bitsPerValue]; |
| + writeMasks = WRITE_MASKS[bitsPerValue]; |
| + } |
| + |
| + /** |
| + * @param index the position of the value. |
| + * @return the value at the given index. |
| + */ |
| + public long get(final int index) { |
| + final long majorBitPos = index * bitsPerValue; |
| + final int elementPos = (int)(majorBitPos >>> BLOCK_BITS); // / BLOCK_SIZE |
| + final int bitPos = (int)(majorBitPos & MOD_MASK); // % BLOCK_SIZE); |
| + |
| + final int base = bitPos * FAC_BITPOS; |
| + |
| + return ((blocks[elementPos] << shifts[base]) >>> shifts[base+1]) | |
| + ((blocks[elementPos+1] >>> shifts[base+2]) |
| + & readMasks[bitPos]); |
| + } |
| + |
| + public void set(final int index, final long value) { |
| + final int intValue = (int)value; |
| + final long majorBitPos = index * bitsPerValue; |
| + final int elementPos = (int)(majorBitPos >>> BLOCK_BITS); // / BLOCK_SIZE |
| + final int bitPos = (int)(majorBitPos & MOD_MASK); // % BLOCK_SIZE); |
| + final int base = bitPos * FAC_BITPOS; |
| + |
| + blocks[elementPos ] = (blocks[elementPos ] & writeMasks[base]) |
| + | (intValue << shifts[base + 1] >>> shifts[base]); |
| + blocks[elementPos+1] = (blocks[elementPos+1] & writeMasks[base+1]) |
| + | ((intValue << shifts[base + 2]) |
| + & writeMasks[base+2]); |
| + } |
| + |
| + public void clear() { |
| + Arrays.fill(blocks, 0); |
| + } |
| + |
| + public String toString() { |
| + return "Packed32(bitsPerValue=" + bitsPerValue + ", maxPos=" + maxPos |
| + + ", elements.length=" + blocks.length + ")"; |
| + } |
| + |
| + public long ramBytesUsed() { |
| + return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER |
| + + blocks.length * RamUsageEstimator.NUM_BYTES_INT; |
| + } |
| +} |
| Index: src/java/org/apache/lucene/store/IndexInput.java |
| =================================================================== |
| --- src/java/org/apache/lucene/store/IndexInput.java (revision 895342) |
| +++ src/java/org/apache/lucene/store/IndexInput.java Fri Jan 22 12:58:35 CET 2010 |
| @@ -64,6 +64,13 @@ |
| readBytes(b, offset, len); |
| } |
| |
| + /** Reads two bytes and returns a short. |
| + * @see IndexOutput#writeInt(int) |
| + */ |
| + public short readShort() throws IOException { |
| + return (short) (((readByte() & 0xFF) << 8) | (readByte() & 0xFF)); |
| + } |
| + |
| /** Reads four bytes and returns an int. |
| * @see IndexOutput#writeInt(int) |
| */ |
| Index: src/java/org/apache/lucene/util/packed/PackedAligned32.java |
| =================================================================== |
| --- src/java/org/apache/lucene/util/packed/PackedAligned32.java Fri Feb 12 02:19:44 CET 2010 |
| +++ src/java/org/apache/lucene/util/packed/PackedAligned32.java Fri Feb 12 02:19:44 CET 2010 |
| @@ -0,0 +1,185 @@ |
| +package org.apache.lucene.util.packed; |
| + |
| +import org.apache.lucene.store.IndexInput; |
| +import org.apache.lucene.util.RamUsageEstimator; |
| + |
| +import java.io.IOException; |
| +import java.util.Arrays; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +/** |
| + * Medium space and speed trade off. No values crosses block boundaries. |
| + * The maximum number of bits/value is 32. |
| + * Use {@link org.apache.lucene.util.packed.PackedAligned64} for higher numbers. |
| + * </p><p> |
| + * The implementation strives to avoid conditionals and expensive operations, |
| + * sacrificing code clarity to achieve better performance. |
| + * </p><p> |
| + * Space is optimally used within the boundaries of alignment, e.g. |
| + * 7 bits/value fits 4 values/block for 32 bit and 7 values/block for 64 bit. |
| + * Bits are packed left-aligned to be bit pattern compatible with other bit |
| + * array implementations where possible. |
| + */ |
| +public class PackedAligned32 extends PackedInts.ReaderImpl |
| + implements PackedInts.Mutable { |
| + static final int BLOCK_SIZE = 32; // 32 = int, 64 = long |
| + |
| + private static final int ENTRY_SIZE = BLOCK_SIZE + 1; |
| + |
| + /* |
| + * A value is always positioned inside a single block, requiring a |
| + * shift right to position the bits and a mask to extract them. |
| + */ |
| + private static final int[][] SHIFTS = new int[ENTRY_SIZE][ENTRY_SIZE]; |
| + private static final int[] READ_MASKS = new int[ENTRY_SIZE]; |
| + |
| + static { // Generate shifts |
| + for (int elementBits = 1 ; elementBits <= BLOCK_SIZE ; elementBits++) { |
| + int[] currentShifts = SHIFTS[elementBits]; |
| + for (int bitPos = 0 ; bitPos < BLOCK_SIZE ; bitPos++) { |
| + currentShifts[bitPos] = BLOCK_SIZE + bitPos - elementBits; |
| + READ_MASKS[elementBits] = ~(~0 << elementBits); |
| + } |
| + } |
| + } |
| + |
| + /* |
| + * Setting a value requires clearing the destination bits with a mask, then |
| + * shifting the value to the left and or'ing the two numbers. |
| + */ |
| + private static final int[][] WRITE_MASKS = new int[ENTRY_SIZE][ENTRY_SIZE]; |
| + static { |
| + for (int elementBits = 1 ; elementBits <= BLOCK_SIZE ; elementBits++) { |
| + int elementPosMask = ~(~0 << elementBits); |
| + int[] currentShifts = SHIFTS[elementBits]; |
| + int[] currentMasks = WRITE_MASKS[elementBits]; |
| + for (int bitPos = 0 ; bitPos < BLOCK_SIZE ; bitPos++) { |
| + currentMasks[bitPos] = ~(elementPosMask |
| + << currentShifts[bitPos]); |
| + } |
| + } |
| + } |
| + |
| + /* The bits */ |
| + private int[] blocks; |
| + |
| + /* Cached values */ |
| + private int valuesPerBlock; |
| + private int[] shifts; |
| + private int readMask; |
| + private int[] writeMasks; |
| + |
| + /** |
| + * Creates an array with the internal structures adjusted for the given |
| + * limits and initialized to 0. |
| + * @param valueCount the number of values. |
| + * @param bitsPerValue the number of bits available for any given value. |
| + */ |
| + public PackedAligned32(int valueCount, int bitsPerValue) { |
| + super(valueCount, bitsPerValue); |
| + if (bitsPerValue > 32) { |
| + throw new IllegalArgumentException(String.format( |
| + "This array only supports values of 32 bits or less. The " |
| + + "required number of bits was %d. The PackedAligned64 " |
| + + "implementation allows values with more than 32 bits", |
| + bitsPerValue)); |
| + } |
| + blocks = new int[size(valueCount, bitsPerValue)]; |
| + updateCached(); |
| + } |
| + |
| + private static int size(int valueCount, int bitsPerValue) { |
| + int valuesPerBlock = BLOCK_SIZE / bitsPerValue; |
| + return valueCount / valuesPerBlock + 2; |
| + } |
| + |
| + /** |
| + * Creates an array with content retrieved from the given IndexInput. |
| + * @param in an IndexInput, positioned at the start of Packed64-content. |
| + * @param valueCount the number of elements. |
| + * @param bitsPerValue the number of bits available for any given value. |
| + * @throws java.io.IOException if the values for the backing array could not |
| + * be retrieved. |
| + */ |
| + public PackedAligned32(IndexInput in, int valueCount, int bitsPerValue) |
| + throws IOException { |
| + super(valueCount, bitsPerValue); |
| + int size = size(valueCount, bitsPerValue); |
| + blocks = new int[size]; |
| + for(int i=0;i<size;i++) { |
| + blocks[i] = in.readInt(); |
| + } |
| + updateCached(); |
| + } |
| + |
| + |
| + /** |
| + * Creates an array backed by the given blocks. |
| + * </p><p> |
| + * Note: The blocks are used directly, so changes to the given block will |
| + * affect the Packed32-structure. |
| + * @param blocks used as the internal backing array. |
| + * @param valueCount the number of values. |
| + * @param bitsPerValue the number of bits available for any given value. |
| + */ |
| + public PackedAligned32(int[] blocks, int valueCount, int bitsPerValue) { |
| + // TODO: Check that blocks.length is sufficient for holding length values |
| + super(valueCount, bitsPerValue); |
| + this.blocks = blocks; |
| + updateCached(); |
| + } |
| + |
| + private void updateCached() { |
| + valuesPerBlock = BLOCK_SIZE / bitsPerValue; |
| + shifts = SHIFTS[bitsPerValue]; |
| + readMask = READ_MASKS[bitsPerValue]; |
| + writeMasks = WRITE_MASKS[bitsPerValue]; |
| + } |
| + |
| + /** |
| + * @param index the position of the value. |
| + * @return the value at the given index. |
| + */ |
| + public long get(final int index) { |
| + final int blockPos = index / valuesPerBlock; |
| + final int bitPos = (index - (blockPos * valuesPerBlock)) * bitsPerValue; |
| + |
| + return (blocks[blockPos] >>> shifts[bitPos]) & readMask; |
| + } |
| + |
| + public void set(final int index, final long value) { |
| + final int intValue = (int)value; |
| + |
| + final int blockPos = index / valuesPerBlock; |
| + final int bitPos = (index - (blockPos * valuesPerBlock)) * bitsPerValue; |
| + |
| + blocks[blockPos] = (blocks[blockPos] & writeMasks[bitPos]) |
| + | (intValue << shifts[bitPos]); |
| + } |
| + |
| + public void clear() { |
| + Arrays.fill(blocks, 0); |
| + } |
| + |
| + public long ramBytesUsed() { |
| + return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER |
| + + blocks.length * RamUsageEstimator.NUM_BYTES_INT; |
| + } |
| + |
| +} |
| \ No newline at end of file |
| Index: src/java/org/apache/lucene/util/RamUsageEstimator.java |
| =================================================================== |
| --- src/java/org/apache/lucene/util/RamUsageEstimator.java (revision 901710) |
| +++ src/java/org/apache/lucene/util/RamUsageEstimator.java Fri Jan 22 13:01:30 CET 2010 |
| @@ -35,6 +35,16 @@ |
| * estimate is complete. |
| */ |
| public final class RamUsageEstimator { |
| + |
| + public static int NUM_BYTES_SHORT = 2; |
| + public static int NUM_BYTES_INT = 4; |
| + public static int NUM_BYTES_LONG = 8; |
| + public static int NUM_BYTES_FLOAT = 4; |
| + public static int NUM_BYTES_DOUBLE = 8; |
| + public static int NUM_BYTES_OBJ_HEADER = 8; |
| + public static int NUM_BYTES_OBJ_REF = Constants.JRE_IS_64BIT ? 8 : 4; |
| + public static int NUM_BYTES_ARRAY_HEADER = NUM_BYTES_OBJ_HEADER + NUM_BYTES_INT + NUM_BYTES_OBJ_REF; |
| + |
| private MemoryModel memoryModel; |
| |
| private final Map<Object,Object> seen; |
| @@ -45,11 +55,6 @@ |
| |
| public final static int NUM_BYTES_OBJECT_REF = Constants.JRE_IS_64BIT ? 8 : 4; |
| public final static int NUM_BYTES_CHAR = 2; |
| - public final static int NUM_BYTES_SHORT = 2; |
| - public final static int NUM_BYTES_INT = 4; |
| - public final static int NUM_BYTES_LONG = 8; |
| - public final static int NUM_BYTES_FLOAT = 4; |
| - public final static int NUM_BYTES_DOUBLE = 8; |
| |
| private boolean checkInterned; |
| |
| Index: src/java/org/apache/lucene/util/ConsumesRAM.java |
| =================================================================== |
| --- src/java/org/apache/lucene/util/ConsumesRAM.java Fri Jan 22 12:58:35 CET 2010 |
| +++ src/java/org/apache/lucene/util/ConsumesRAM.java Fri Jan 22 12:58:35 CET 2010 |
| @@ -0,0 +1,22 @@ |
| +package org.apache.lucene.util; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +public interface ConsumesRAM { |
| + public long ramBytesUsed(); |
| +} |
| Index: src/java/org/apache/lucene/util/packed/PackedDirectInt.java |
| =================================================================== |
| --- src/java/org/apache/lucene/util/packed/PackedDirectInt.java Fri Feb 12 01:29:57 CET 2010 |
| +++ src/java/org/apache/lucene/util/packed/PackedDirectInt.java Fri Feb 12 01:29:57 CET 2010 |
| @@ -0,0 +1,82 @@ |
| +package org.apache.lucene.util.packed; |
| + |
| +import org.apache.lucene.store.IndexInput; |
| +import org.apache.lucene.store.IndexOutput; |
| +import org.apache.lucene.util.RamUsageEstimator; |
| + |
| +import java.io.IOException; |
| +import java.util.Arrays; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +/** |
| + * Direct wrapping of 32 bit values to a backing array of ints. |
| + */ |
| +public class PackedDirectInt extends PackedInts.ReaderImpl |
| + implements PackedInts.Mutable { |
| + private int[] blocks; |
| + private static final int BITS_PER_VALUE = 32; |
| + |
| + public PackedDirectInt(int valueCount) { |
| + super(valueCount, BITS_PER_VALUE); |
| + blocks = new int[valueCount]; |
| + } |
| + |
| + public PackedDirectInt(IndexInput in, int valueCount) throws IOException { |
| + super(valueCount, BITS_PER_VALUE); |
| + int[] blocks = new int[valueCount]; |
| + for(int i=0;i<valueCount;i++) { |
| + blocks[i] = in.readInt(); |
| + } |
| + final int mod = valueCount % 2; |
| + if (mod != 0) { |
| + in.readInt(); |
| + } |
| + |
| + this.blocks = blocks; |
| + } |
| + |
| + /** |
| + * Creates an array backed by the given blocks. |
| + * </p><p> |
| + * Note: The blocks are used directly, so changes to the given block will |
| + * affect the structure. |
| + * @param blocks used as the internal backing array. |
| + */ |
| + public PackedDirectInt(int[] blocks) { |
| + super(blocks.length, BITS_PER_VALUE); |
| + this.blocks = blocks; |
| + } |
| + |
| + public long get(final int index) { |
| + return 0xFFFFFFFFL & blocks[index]; |
| + } |
| + |
| + public void set(final int index, final long value) { |
| + blocks[index] = (int)(value & 0xFFFFFFFF); |
| + } |
| + |
| + public long ramBytesUsed() { |
| + return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + |
| + blocks.length * RamUsageEstimator.NUM_BYTES_INT; |
| + } |
| + |
| + public void clear() { |
| + Arrays.fill(blocks, 0); |
| + } |
| +} |
| \ No newline at end of file |
| Index: src/java/org/apache/lucene/util/packed/TODO |
| =================================================================== |
| --- src/java/org/apache/lucene/util/packed/TODO Fri Feb 12 03:23:08 CET 2010 |
| +++ src/java/org/apache/lucene/util/packed/TODO Fri Feb 12 03:23:08 CET 2010 |
| @@ -0,0 +1,103 @@ |
| +- Make a better check for persistence validity |
| +All types of writers and readers needs to be checked |
| + |
| +- Make PackedAligned64 persistence work |
| +There's a bug somewhere. The same bug will probably also be in PackedAligned32. |
| + |
| +- Test whether aligned is always faster than packed |
| +Aligned uses more logic (mainly a division), but packed requests two ints/longs |
| +from RAM for each request. Maybe we can avoid using aligned at all? |
| +TestPackedInts.testSpeed() does performance testing. This should be done on |
| +different hardware. |
| + |
| +- Ensure that writers align to 64 bit and that they append an empty long |
| +The extra long is needed by the packed-implementation in order to avoid |
| +conditionals. Right now is could be spared as we always load into memory and |
| +could just allocate an extra long, but a switch to mem-mapping or such would |
| +require the persistent format to contain the long. |
| + |
| + - Determine whether it should be possible to request mutable arrays or not |
| +Keeping it possible to have mutable arrays opens up for re-use of the |
| +implementations, but it also makes the implementations heavier to maintain |
| +due to the added set-method. |
| + |
| +- Determine how to request a Writer |
| +Using an aligned64-structure on a 32 bit machine comes with a heavy |
| +speed-penalty, due to the Reader having to request a long for each get. |
| +The overhead is so large that it would be better to use packed (backed by int) |
| +instead. This means - unfortunately - that the Writer needs to guess the |
| +architecture that the Reader will be used on. |
| +Using a factory can help choosing the implementation: Requesting an aligned |
| +structure with 7 bits/value is a bad idea as the faster directByte takes the |
| +same amount of space. On a 32 bit machine that is. In order for the factory |
| +to auto-promote selections, it needs to know whether 32 bit or 64 bit is |
| +preferable. |
| + |
| +- JavaDocs |
| + |
| + |
| + |
| +******************************************************************************** |
| +testSpeed result by Toke Eskildsen (te@statsbiblioteket.dk) 2010-02-12 |
| +Java 1.6.0_15-b03, default settings |
| +Dell Precision M6500: Intel i7 Q 820 @ 1.73GHz, 8 MB level 2 cache, |
| + dual-channel PC 1333 RAM |
| +******************************************************************************** |
| + |
| + bitsPerValue valueCount getCount PackedDirectByte PackedDirectShort Packed32 PackedAligned32 PackedDirectInt Packed64 PackedAligned64 PackedDirectLong |
| + 1 1000 10000000 167 141 258 242 172 264 242 183 |
| + 1 1000000 10000000 224 232 266 233 246 262 238 338 |
| + 1 10000000 10000000 359 469 280 278 508 278 272 551 |
| + 3 1000 10000000 168 166 265 241 163 262 243 166 |
| + 3 1000000 10000000 227 226 261 251 239 274 249 330 |
| + 3 10000000 10000000 406 476 301 304 522 300 308 547 |
| + 4 1000 10000000 167 168 266 239 164 285 239 169 |
| + 4 1000000 10000000 228 231 294 274 262 291 269 314 |
| + 4 10000000 10000000 385 480 308 333 514 331 315 557 |
| + 7 1000 10000000 172 174 278 248 162 271 238 177 |
| + 7 1000000 10000000 224 236 289 281 272 278 277 345 |
| + 7 10000000 10000000 405 473 389 447 516 399 402 553 |
| + 8 1000 10000000 192 171 268 242 174 291 240 163 |
| + 8 1000000 10000000 226 232 291 284 286 274 265 314 |
| + 8 10000000 10000000 381 467 406 428 512 422 419 580 |
| + |
| + bitsPerValue valueCount getCount PackedDirectShort Packed32 PackedAligned32 PackedDirectInt Packed64 PackedAligned64 PackedDirectLong |
| + 9 1000 10000000 166 274 241 170 261 237 163 |
| + 9 1000000 10000000 229 299 273 250 284 275 327 |
| + 9 10000000 10000000 483 443 477 519 438 455 568 |
| + 15 1000 10000000 170 265 239 174 264 235 162 |
| + 15 1000000 10000000 232 285 274 240 278 269 339 |
| + 15 10000000 10000000 473 518 524 523 519 521 550 |
| + 16 1000 10000000 166 263 236 172 264 235 160 |
| + 16 1000000 10000000 229 285 278 244 293 272 332 |
| + 16 10000000 10000000 470 513 517 509 534 529 548 |
| + |
| + bitsPerValue valueCount getCount Packed32 PackedAligned32 PackedDirectInt Packed64 PackedAligned64 PackedDirectLong |
| + 17 1000 10000000 262 255 177 260 234 160 |
| + 17 1000000 10000000 290 306 273 304 290 320 |
| + 17 10000000 10000000 532 572 533 529 556 551 |
| + 28 1000 10000000 269 256 187 267 238 163 |
| + 28 1000000 10000000 293 295 253 293 296 312 |
| + 28 10000000 10000000 542 567 501 548 567 542 |
| + 31 1000 10000000 260 235 177 266 232 158 |
| + 31 1000000 10000000 292 294 244 296 297 328 |
| + 31 10000000 10000000 552 563 516 562 568 548 |
| + |
| + bitsPerValue valueCount getCount PackedDirectInt Packed64 PackedAligned64 PackedDirectLong |
| + 32 1000 10000000 172 263 241 166 |
| + 32 1000000 10000000 241 291 297 320 |
| + 32 10000000 10000000 519 556 573 546 |
| + |
| + bitsPerValue valueCount getCount Packed64 PackedAligned64 PackedDirectLong |
| + 33 1000 10000000 264 239 159 |
| + 33 1000000 10000000 293 374 319 |
| + 33 10000000 10000000 559 595 552 |
| + 47 1000 10000000 264 242 164 |
| + 47 1000000 10000000 319 369 322 |
| + 47 10000000 10000000 577 601 548 |
| + 49 1000 10000000 261 243 162 |
| + 49 1000000 10000000 323 413 319 |
| + 49 10000000 10000000 584 610 551 |
| + 63 1000 10000000 269 235 161 |
| + 63 1000000 10000000 396 369 313 |
| + 63 10000000 10000000 592 596 559 |
| Index: src/java/org/apache/lucene/util/packed/PackedDirectShort.java |
| =================================================================== |
| --- src/java/org/apache/lucene/util/packed/PackedDirectShort.java Fri Feb 12 01:29:57 CET 2010 |
| +++ src/java/org/apache/lucene/util/packed/PackedDirectShort.java Fri Feb 12 01:29:57 CET 2010 |
| @@ -0,0 +1,86 @@ |
| +package org.apache.lucene.util.packed; |
| + |
| +import org.apache.lucene.store.IndexInput; |
| +import org.apache.lucene.store.IndexOutput; |
| +import org.apache.lucene.util.RamUsageEstimator; |
| + |
| +import java.io.IOException; |
| +import java.util.Arrays; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +/** |
| + * Direct wrapping of 16 bit values to a backing array of shorts. |
| + */ |
| +public class PackedDirectShort extends PackedInts.ReaderImpl |
| + implements PackedInts.Mutable { |
| + private short[] blocks; |
| + private static final int BITS_PER_VALUE = 16; |
| + |
| + public PackedDirectShort(int valueCount) { |
| + super(valueCount, BITS_PER_VALUE); |
| + blocks = new short[valueCount]; |
| + } |
| + |
| + public PackedDirectShort(IndexInput in, int valueCount) throws IOException { |
| + super(valueCount, BITS_PER_VALUE); |
| + short[] blocks = new short[valueCount]; |
| + for(int i=0;i<valueCount;i++) { |
| + blocks[i] = in.readShort(); |
| + } |
| + final int mod = valueCount % 4; |
| + if (mod != 0) { |
| + final int pad = 4-mod; |
| + // round out long |
| + for(int i=0;i<pad;i++) { |
| + in.readShort(); |
| + } |
| + } |
| + |
| + this.blocks = blocks; |
| + } |
| + |
| + /** |
| + * Creates an array backed by the given blocks. |
| + * </p><p> |
| + * Note: The blocks are used directly, so changes to the given block will |
| + * affect the structure. |
| + * @param blocks used as the internal backing array. |
| + */ |
| + public PackedDirectShort(short[] blocks) { |
| + super(blocks.length, BITS_PER_VALUE); |
| + this.blocks = blocks; |
| + } |
| + |
| + public long get(final int index) { |
| + return 0xFFFFL & blocks[index]; |
| + } |
| + |
| + public void set(final int index, final long value) { |
| + blocks[index] = (short)(value & 0xFFFF); |
| + } |
| + |
| + public long ramBytesUsed() { |
| + return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + |
| + blocks.length * RamUsageEstimator.NUM_BYTES_SHORT; |
| + } |
| + |
| + public void clear() { |
| + Arrays.fill(blocks, (short)0); |
| + } |
| +} |
| \ No newline at end of file |
| Index: src/java/org/apache/lucene/util/packed/PackedAlignedWriter.java |
| =================================================================== |
| --- src/java/org/apache/lucene/util/packed/PackedAlignedWriter.java Fri Feb 12 02:59:59 CET 2010 |
| +++ src/java/org/apache/lucene/util/packed/PackedAlignedWriter.java Fri Feb 12 02:59:59 CET 2010 |
| @@ -0,0 +1,95 @@ |
| +package org.apache.lucene.util.packed; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import org.apache.lucene.store.IndexOutput; |
| + |
| +import java.io.IOException; |
| + |
| +// Packs high order byte first, to match |
| +// IndexOutput.writeInt/Long/Short byte order |
| + |
| +/** |
| + * Generic writer for block-aligned values: Bits for values are stored so |
| + * that block-boundaries are never crossed. For some number of bits, this means |
| + * wasted space in the blocks. |
| + * </p><p> |
| + * The bits for values are stored left-aligned in the blocks, in order to be |
| + * bit-pattern compatible with byte, short, int and long-backed implementations |
| + * as well as packed for 1, 2, 4, 8, 16, 32 and 64 bits/value. |
| + */ |
| +public class PackedAlignedWriter extends PackedInts.Writer { |
| + private final PackedInts.BLOCK_PREFERENCE blockPref; |
| + private long pending = 0; |
| + private int pendingBitPos = 0; |
| + private int written = 0; |
| + |
| + public PackedAlignedWriter(IndexOutput out, int valueCount, |
| + int bitsPerValue, PackedInts.BLOCK_PREFERENCE blockPref) |
| + throws IOException { |
| + super(out, valueCount, bitsPerValue, |
| + blockPref == PackedInts.BLOCK_PREFERENCE.bit32 ? |
| + PackedInts.PERSISTENCE.aligned32 : |
| + PackedInts.PERSISTENCE.aligned64); |
| + this.blockPref = blockPref; |
| + } |
| + |
| + @Override |
| + public void add(long value) throws IOException { |
| + // TODO: Consider caching maxValue and bits/block |
| + assert value <= PackedInts.maxValue(bitsPerValue) : "value=" + value |
| + + " maxValue=" + PackedInts.maxValue(bitsPerValue); |
| + assert value >= 0; |
| + assert written <= valueCount : "The number of values to write has been " + |
| + "exceeded, expected number of values: " + valueCount; |
| + pending |= value << (blockPref.getBits() - pendingBitPos - bitsPerValue); |
| + pendingBitPos += bitsPerValue; |
| + if (pendingBitPos > blockPref.getBits() - bitsPerValue) { |
| + flush(); |
| + } |
| + written++; |
| + } |
| + |
| + @Override |
| + public void finish() throws IOException { |
| + assert written == valueCount : |
| + valueCount + " values should be added, but only " + written |
| + + " has been received"; |
| + if (pendingBitPos != 0) { |
| + flush(); |
| + } |
| + out.writeLong(0L); // Dummy last element to be compatible with packed |
| + } |
| + |
| + private void flush() throws IOException { |
| + // TODO: Align to 64 bit |
| + switch (blockPref) { |
| + case bit32: { |
| + out.writeInt((int)(pending >>> 32)); |
| + break; |
| + } |
| + case bit64: { |
| + out.writeLong(pending); |
| + break; |
| + } |
| + } |
| + pending = 0; |
| + pendingBitPos = 0; |
| + } |
| + |
| +} |
| \ No newline at end of file |
| Index: src/java/org/apache/lucene/util/packed/PackedDirectByte.java |
| =================================================================== |
| --- src/java/org/apache/lucene/util/packed/PackedDirectByte.java Fri Feb 12 02:25:36 CET 2010 |
| +++ src/java/org/apache/lucene/util/packed/PackedDirectByte.java Fri Feb 12 02:25:36 CET 2010 |
| @@ -0,0 +1,86 @@ |
| +package org.apache.lucene.util.packed; |
| + |
| +import org.apache.lucene.store.IndexInput; |
| +import org.apache.lucene.store.IndexOutput; |
| +import org.apache.lucene.util.RamUsageEstimator; |
| + |
| +import java.io.IOException; |
| +import java.util.Arrays; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +/** |
| + * Direct wrapping of 8 bit values to a backing array of bytes. |
| + */ |
| +public class PackedDirectByte extends PackedInts.ReaderImpl |
| + implements PackedInts.Mutable { |
| + private byte[] blocks; |
| + private static final int BITS_PER_VALUE = 8; |
| + |
| + public PackedDirectByte(int valueCount) { |
| + super(valueCount, BITS_PER_VALUE); |
| + blocks = new byte[valueCount]; |
| + } |
| + |
| + public PackedDirectByte(IndexInput in, int valueCount) |
| + throws IOException { |
| + super(valueCount, BITS_PER_VALUE); |
| + byte[] blocks = new byte[valueCount]; |
| + for(int i=0;i<valueCount;i++) { |
| + blocks[i] = in.readByte(); |
| + } |
| + final int mod = valueCount % 8; |
| + if (mod != 0) { |
| + final int pad = 8-mod; |
| + // round out long |
| + for(int i=0;i<pad;i++) { |
| + in.readByte(); |
| + } |
| + } |
| + |
| + this.blocks = blocks; |
| + } |
| + |
| + /** |
| + * Creates an array backed by the given blocks. |
| + * </p><p> |
| + * Note: The blocks are used directly, so changes to the given block will |
| + * affect the structure. |
| + * @param blocks used as the internal backing array. |
| + */ |
| + public PackedDirectByte(byte[] blocks) { |
| + super(blocks.length, BITS_PER_VALUE); |
| + this.blocks = blocks; |
| + } |
| + |
| + public long get(final int index) { |
| + return 0xFFL & blocks[index]; |
| + } |
| + |
| + public void set(final int index, final long value) { |
| + blocks[index] = (byte)(value & 0xFF); |
| + } |
| + |
| + public long ramBytesUsed() { |
| + return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + blocks.length; |
| + } |
| + |
| + public void clear() { |
| + Arrays.fill(blocks, (byte)0); |
| + } |
| +} |
| \ No newline at end of file |
| Index: src/java/org/apache/lucene/util/packed/PackedWriter.java |
| =================================================================== |
| --- src/java/org/apache/lucene/util/packed/PackedWriter.java Fri Feb 12 00:37:49 CET 2010 |
| +++ src/java/org/apache/lucene/util/packed/PackedWriter.java Fri Feb 12 00:37:49 CET 2010 |
| @@ -0,0 +1,105 @@ |
| +package org.apache.lucene.util.packed; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import org.apache.lucene.store.IndexOutput; |
| + |
| +import java.io.IOException; |
| + |
| +// Packs high order byte first, to match |
| +// IndexOutput.writeInt/Long/Short byte order |
| + |
| +/** |
| + * Generic writer for space-optimal packed values. The resulting bits can be |
| + * used directly by Packed32, Packed64 and PackedDirect* and will always be |
| + * long-aligned. |
| + */ |
| +public class PackedWriter extends PackedInts.Writer { |
| + private long pending; |
| + private int pendingBitPos; |
| + |
| + // masks[n-1] masks for bottom n bits |
| + private final long[] masks; |
| + |
| + // nocommit -- allow minValue too? ie not just minValue==0 |
| + |
| + public PackedWriter(IndexOutput out, int valueCount, int bitsPerValue) |
| + throws IOException { |
| + |
| + super(out, valueCount, bitsPerValue, PackedInts.PERSISTENCE.packed); |
| + |
| + pendingBitPos = 64; |
| + masks = new long[bitsPerValue - 1]; |
| + |
| + int v = 1; |
| + for (int i = 0; i < bitsPerValue - 1; i++) { |
| + v *= 2; |
| + masks[i] = v - 1; |
| + } |
| + } |
| + |
| + /** |
| + * Do not call this after finish |
| + */ |
| + @Override |
| + public void add(long v) throws IOException { |
| + assert v <= PackedInts.maxValue(bitsPerValue) : "v=" + v |
| + + " maxValue=" + PackedInts.maxValue(bitsPerValue); |
| + assert v >= 0; |
| + //System.out.println(" packedw add v=" + v + " pendingBitPos=" + pendingBitPos); |
| + |
| + // TODO |
| + if (pendingBitPos >= bitsPerValue) { |
| + // not split |
| + |
| + // write-once, so we can |= w/o first masking to 0s |
| + pending |= v << (pendingBitPos - bitsPerValue); |
| + if (pendingBitPos == bitsPerValue) { |
| + // flush |
| + out.writeLong(pending); |
| + pending = 0; |
| + pendingBitPos = 64; |
| + } else { |
| + pendingBitPos -= bitsPerValue; |
| + } |
| + |
| + } else { |
| + // split |
| + |
| + // write top pendingBitPos bits of value into bottom bits of pending |
| + pending |= (v >> (bitsPerValue - pendingBitPos)) & masks[pendingBitPos - 1]; |
| + //System.out.println(" part1 (v >> " + (bitsPerValue - pendingBitPos) + ") & " + masks[pendingBitPos-1]); |
| + |
| + // flush |
| + out.writeLong(pending); |
| + |
| + // write bottom (bitsPerValue - pendingBitPos) bits of value into top bits of pending |
| + pendingBitPos = 64 - bitsPerValue + pendingBitPos; |
| + //System.out.println(" part2 v << " + pendingBitPos); |
| + pending = (v << pendingBitPos); |
| + } |
| + } |
| + |
| + @Override |
| + public void finish() throws IOException { |
| + if (pendingBitPos != 64) { |
| + out.writeLong(pending); |
| + } |
| + out.writeLong(0L); // Dummy to compensate for not using conditionals |
| + } |
| +} |
| \ No newline at end of file |
| Index: src/java/org/apache/lucene/util/BytesRef.java |
| =================================================================== |
| --- src/java/org/apache/lucene/util/BytesRef.java Fri Jan 22 12:58:35 CET 2010 |
| +++ src/java/org/apache/lucene/util/BytesRef.java Fri Jan 22 12:58:35 CET 2010 |
| @@ -0,0 +1,170 @@ |
| +package org.apache.lucene.util; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import java.io.UnsupportedEncodingException; |
| + |
| +// nocommit -- share w/ flex's TermRef |
| +public class BytesRef { |
| + |
| + public byte[] bytes; |
| + public int offset; |
| + public int length; |
| + |
| + public abstract static class Comparator { |
| + abstract public int compare(BytesRef a, BytesRef b); |
| + } |
| + |
| + public BytesRef() { |
| + } |
| + |
| + /** Creates bytes ref, wrapping UTF8 bytes from the |
| + * provided string. */ |
| + public BytesRef(String s) { |
| + try { |
| + bytes = s.getBytes("UTF-8"); |
| + } catch (UnsupportedEncodingException uee) { |
| + throw new RuntimeException(uee); |
| + } |
| + offset = 0; |
| + length = bytes.length; |
| + } |
| + |
| + public BytesRef(BytesRef other) { |
| + offset = 0; |
| + length = other.length; |
| + bytes = new byte[other.length]; |
| + System.arraycopy(other.bytes, other.offset, bytes, 0, length); |
| + } |
| + |
| + public boolean bytesEquals(BytesRef other) { |
| + if (length == other.length) { |
| + int upto = offset; |
| + int otherUpto = other.offset; |
| + final byte[] otherBytes = other.bytes; |
| + for(int i=0;i<length;i++) { |
| + if (bytes[upto++] != otherBytes[otherUpto++]) { |
| + return false; |
| + } |
| + } |
| + return true; |
| + } else { |
| + return false; |
| + } |
| + } |
| + |
| + public String utf8ToString() { |
| + try { |
| + return new String(bytes, offset, length, "UTF8"); |
| + } catch (java.io.UnsupportedEncodingException uee) { |
| + throw new RuntimeException(uee); |
| + } |
| + } |
| + |
| + private final static Comparator straightComparator = new StraightComparator(); |
| + |
| + public static Comparator getStraightComparator() { |
| + return straightComparator; |
| + } |
| + |
| + public static class StraightComparator extends Comparator { |
| + public int compare(BytesRef a, BytesRef b) { |
| + int aUpto = a.offset; |
| + int bUpto = b.offset; |
| + final int aStop; |
| + if (a.length <= b.length) { |
| + aStop = aUpto + a.length; |
| + } else { |
| + aStop = aUpto + b.length; |
| + } |
| + while(aUpto < aStop) { |
| + final int cmp = a.bytes[aUpto++] - b.bytes[bUpto++]; |
| + if (cmp != 0) { |
| + return cmp; |
| + } |
| + } |
| + return a.length - b.length; |
| + } |
| + } |
| + |
| + private final static Comparator utf8SortedAsUTF16SortOrder = new UTF8SortedAsUTF16Comparator(); |
| + |
| + public static Comparator getUTF8SortedAsUTF16Comparator() { |
| + return utf8SortedAsUTF16SortOrder; |
| + } |
| + |
| + public static class UTF8SortedAsUTF16Comparator extends Comparator { |
| + public int compare(BytesRef a, BytesRef b) { |
| + |
| + final byte[] aBytes = a.bytes; |
| + int aUpto = a.offset; |
| + final byte[] bBytes = b.bytes; |
| + int bUpto = b.offset; |
| + |
| + final int aStop; |
| + if (a.length < b.length) { |
| + aStop = aUpto + a.length; |
| + } else { |
| + aStop = aUpto + b.length; |
| + } |
| + |
| + while(aUpto < aStop) { |
| + int aByte = aBytes[aUpto++] & 0xff; |
| + int bByte = bBytes[bUpto++] & 0xff; |
| + |
| + if (aByte != bByte) { |
| + // See http://icu-project.org/docs/papers/utf16_code_point_order.html#utf-8-in-utf-16-order |
| + // We know the terms are not equal, but, we may |
| + // have to carefully fixup the bytes at the |
| + // difference to match UTF16's sort order: |
| + if (aByte >= 0xee && bByte >= 0xee) { |
| + if ((aByte & 0xfe) == 0xee) { |
| + aByte += 0x10; |
| + } |
| + if ((bByte&0xfe) == 0xee) { |
| + bByte += 0x10; |
| + } |
| + } |
| + return aByte - bByte; |
| + } |
| + } |
| + |
| + // One is a prefix of the other, or, they are equal: |
| + return a.length - b.length; |
| + } |
| + } |
| + |
| + // nocommit -- kinda hackish? needed only (so far) for FieldComparator |
| + private static class ComparableBytesRef implements Comparable { |
| + private final BytesRef b; |
| + private final Comparator c; |
| + public ComparableBytesRef(BytesRef b, Comparator c) { |
| + this.b = b; |
| + this.c = c; |
| + } |
| + |
| + public int compareTo(Object other) { |
| + final ComparableBytesRef o = (ComparableBytesRef) other; |
| + return c.compare(b, o.b); |
| + } |
| + } |
| + |
| + public static Comparable getComparableBytesRef(BytesRef b, Comparator c) { |
| + return new ComparableBytesRef(b, c); |
| + } |
| +} |
| Index: src/java/org/apache/lucene/util/packed/Packed64.java |
| =================================================================== |
| --- src/java/org/apache/lucene/util/packed/Packed64.java Fri Feb 12 01:39:36 CET 2010 |
| +++ src/java/org/apache/lucene/util/packed/Packed64.java Fri Feb 12 01:39:36 CET 2010 |
| @@ -0,0 +1,210 @@ |
| +package org.apache.lucene.util.packed; |
| + |
| +import org.apache.lucene.store.IndexInput; |
| +import org.apache.lucene.util.RamUsageEstimator; |
| + |
| +import java.io.IOException; |
| +import java.util.Arrays; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +/** |
| + * Space optimized random access capable array of values with a fixed number of |
| + * bits. For 32 bits/value and less, performance on 32 bit machines is not |
| + * optimal. Consider using {@link Packed32} for such a setup. |
| + * </p><p> |
| + * The implementation strives to avoid conditionals and expensive operations, |
| + * sacrificing code clarity to achieve better performance. |
| + */ |
| +public class Packed64 extends PackedInts.ReaderImpl |
| + implements PackedInts.Mutable { |
| + static final int BLOCK_SIZE = 64; // 32 = int, 64 = long |
| + static final int BLOCK_BITS = 6; // The #bits representing BLOCK_SIZE |
| + static final int MOD_MASK = BLOCK_SIZE - 1; // x % BLOCK_SIZE |
| + |
| + private static final int ENTRY_SIZE = BLOCK_SIZE + 1; |
| + private static final int FAC_BITPOS = 3; |
| + |
| + /* |
| + * In order to make an efficient value-getter, conditionals should be |
| + * avoided. A value can be positioned inside of a block, requiring shifting |
| + * left or right or it can span two blocks, requiring a left-shift on the |
| + * first block and a right-shift on the right block. |
| + * </p><p> |
| + * By always shifting the first block both left and right, we get exactly |
| + * the right bits. By always shifting the second block right and applying |
| + * a mask, we get the right bits there. After that, we | the two bitsets. |
| + */ |
| + private static final int[][] SHIFTS = |
| + new int[ENTRY_SIZE][ENTRY_SIZE * FAC_BITPOS]; |
| + //new int[BLOCK_SIZE+1][BLOCK_SIZE][BLOCK_SIZE+1]; |
| + private static final long[][] MASKS = new long[ENTRY_SIZE][ENTRY_SIZE]; |
| + |
| + static { // Generate shifts |
| + for (int elementBits = 1 ; elementBits <= BLOCK_SIZE ; elementBits++) { |
| + for (int bitPos = 0 ; bitPos < BLOCK_SIZE ; bitPos++) { |
| + int[] currentShifts = SHIFTS[elementBits]; |
| + int base = bitPos * FAC_BITPOS; |
| + currentShifts[base ] = bitPos; |
| + currentShifts[base + 1] = BLOCK_SIZE - elementBits; |
| + if (bitPos <= BLOCK_SIZE - elementBits) { // Single block |
| + currentShifts[base + 2] = 0; |
| + MASKS[elementBits][bitPos] = 0; |
| + } else { // Two blocks |
| + int rBits = elementBits - (BLOCK_SIZE - bitPos); |
| + currentShifts[base + 2] = BLOCK_SIZE - rBits; |
| + MASKS[elementBits][bitPos] = ~(~0L << rBits); |
| + } |
| + } |
| + } |
| + } |
| + |
| + /* |
| + * The setter requires more masking than the getter. |
| + */ |
| + private static final long[][] WRITE_MASKS = |
| + new long[ENTRY_SIZE][ENTRY_SIZE * FAC_BITPOS]; |
| + static { |
| + for (int elementBits = 1 ; elementBits <= BLOCK_SIZE ; elementBits++) { |
| + long elementPosMask = ~(~0L << elementBits); |
| + int[] currentShifts = SHIFTS[elementBits]; |
| + long[] currentMasks = WRITE_MASKS[elementBits]; |
| + for (int bitPos = 0 ; bitPos < BLOCK_SIZE ; bitPos++) { |
| + int base = bitPos * FAC_BITPOS; |
| + currentMasks[base ] =~((elementPosMask |
| + << currentShifts[base + 1]) |
| + >>> currentShifts[base]); |
| + currentMasks[base+1] = ~(elementPosMask |
| + << currentShifts[base + 2]); |
| + currentMasks[base+2] = currentShifts[base + 2] == 0 ? 0 : ~0; |
| + } |
| + } |
| + } |
| + |
| + /* The bits */ |
| + private long[] blocks; |
| + |
| + // Cached calculations |
| + private int maxPos; // blocks.length * BLOCK_SIZE / elementBits - 1 |
| + private int[] shifts; // The shifts for the current elementBits |
| + private long[] readMasks; |
| + private long[] writeMasks; |
| + |
| + /** |
| + * Creates an array with the internal structures adjusted for the given |
| + * limits and initialized to 0. |
| + * @param valueCount the number of elements. |
| + * @param bitsPerValue the number of bits available for any given value. |
| + */ |
| + public Packed64(int valueCount, int bitsPerValue) { |
| + // TODO: Test for edge-cases (2^31 values, 63 bitsPerValue) |
| + // +2 due to the avoid-conditionals-trick. The last entry is always 0 |
| + this(new long[(int)((long)valueCount * bitsPerValue / BLOCK_SIZE + 2)], |
| + valueCount, bitsPerValue); |
| + } |
| + |
| + |
| + /** |
| + * Creates an array backed by the given blocks. |
| + * </p><p> |
| + * Note: The blocks are used directly, so changes to the given block will |
| + * affect the Packed32-structure. |
| + * @param blocks used as the internal backing array. Not that the last |
| + * element cannot be addressed directly. |
| + * @param valueCount the number of values. |
| + * @param bitsPerValue the number of bits available for any given value. |
| + */ |
| + public Packed64(long[] blocks, int valueCount, int bitsPerValue) { |
| + super(valueCount, bitsPerValue); |
| + this.blocks = blocks; |
| + updateCached(); |
| + } |
| + |
| + /** |
| + * Creates an array with content retrieved from the given IndexInput. |
| + * @param in an IndexInput, positioned at the start of Packed64-content. |
| + * @param valueCount the number of elements. |
| + * @param bitsPerValue the number of bits available for any given value. |
| + * @throws java.io.IOException if the values for the backing array could not |
| + * be retrieved. |
| + */ |
| + public Packed64(IndexInput in, int valueCount, int bitsPerValue) |
| + throws IOException { |
| + super(valueCount, bitsPerValue); |
| + int size = size(bitsPerValue, valueCount); |
| + blocks = new long[size+1]; // +1 due to non-conditional tricks |
| + for(int i=0;i<size;i++) { |
| + blocks[i] = in.readLong(); |
| + } |
| + updateCached(); |
| + } |
| + |
| + private static int size(int bitsPerValue, int valueCount) { |
| + final long totBitCount = (long) valueCount * bitsPerValue; |
| + return (int) (totBitCount/64 + ((totBitCount % 64 == 0 ) ? 0:1)); |
| + } |
| + |
| + private void updateCached() { |
| + readMasks = MASKS[bitsPerValue]; |
| + shifts = SHIFTS[bitsPerValue]; |
| + writeMasks = WRITE_MASKS[bitsPerValue]; |
| + maxPos = (int)((((long)blocks.length) * BLOCK_SIZE / bitsPerValue) - 2); |
| + } |
| + |
| + /** |
| + * @param index the position of the value. |
| + * @return the value at the given index. |
| + */ |
| + public long get(final int index) { |
| + final long majorBitPos = index * bitsPerValue; |
| + final int elementPos = (int)(majorBitPos >>> BLOCK_BITS); // / BLOCK_SIZE |
| + final int bitPos = (int)(majorBitPos & MOD_MASK); // % BLOCK_SIZE); |
| + |
| + final int base = bitPos * FAC_BITPOS; |
| + |
| + return ((blocks[elementPos] << shifts[base]) >>> shifts[base+1]) | |
| + ((blocks[elementPos+1] >>> shifts[base+2]) & readMasks[bitPos]); |
| + } |
| + |
| + public void set(final int index, final long value) { |
| + final long majorBitPos = index * bitsPerValue; |
| + final int elementPos = (int)(majorBitPos >>> BLOCK_BITS); // / BLOCK_SIZE |
| + final int bitPos = (int)(majorBitPos & MOD_MASK); // % BLOCK_SIZE); |
| + final int base = bitPos * FAC_BITPOS; |
| + |
| + blocks[elementPos ] = (blocks[elementPos ] & writeMasks[base]) |
| + | (value << shifts[base + 1] >>> shifts[base]); |
| + blocks[elementPos+1] = (blocks[elementPos+1] & writeMasks[base+1]) |
| + | ((value << shifts[base + 2]) & writeMasks[base+2]); |
| + } |
| + |
| + public String toString() { |
| + return "Packed64(bitsPerValue=" + bitsPerValue + ", size=" |
| + + size() + ", maxPos=" + maxPos |
| + + ", elements.length=" + blocks.length + ")"; |
| + } |
| + |
| + public long ramBytesUsed() { |
| + return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER |
| + + blocks.length * RamUsageEstimator.NUM_BYTES_LONG; |
| + } |
| + |
| + public void clear() { |
| + Arrays.fill(blocks, 0L); |
| + } |
| +} |
| \ No newline at end of file |
| Index: src/test/org/apache/lucene/util/packed/TestPackedInts.java |
| =================================================================== |
| --- src/test/org/apache/lucene/util/packed/TestPackedInts.java Fri Feb 12 03:16:55 CET 2010 |
| +++ src/test/org/apache/lucene/util/packed/TestPackedInts.java Fri Feb 12 03:16:55 CET 2010 |
| @@ -0,0 +1,280 @@ |
| +package org.apache.lucene.util.packed; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import org.apache.lucene.store.*; |
| +import org.apache.lucene.util.LuceneTestCase; |
| + |
| +import java.io.StringWriter; |
| +import java.util.ArrayList; |
| +import java.util.List; |
| +import java.util.Random; |
| +import java.io.IOException; |
| + |
| +public class TestPackedInts extends LuceneTestCase { |
| + |
| +/* public void testBitsRequired() throws Exception { |
| + assertEquals(61, PackedInts.bitsRequired((long)Math.pow(2, 61)-1)); |
| + assertEquals(61, PackedInts.bitsRequired(0x1FFFFFFFFFFFFFFFL)); |
| + assertEquals(62, PackedInts.bitsRequired(0x3FFFFFFFFFFFFFFFL)); |
| + assertEquals(63, PackedInts.bitsRequired(0x7FFFFFFFFFFFFFFFL)); |
| + } */ |
| + |
| + public void testMaxValues() throws Exception { |
| + assertEquals("1 bit -> max == 1", |
| + 1, PackedInts.maxValue(1)); |
| + assertEquals("2 bit -> max == 3", |
| + 3, PackedInts.maxValue(2)); |
| + assertEquals("8 bit -> max == 255", |
| + 255, PackedInts.maxValue(8)); |
| + assertEquals("63 bit -> max == Long.MAX_VALUE", |
| + Long.MAX_VALUE, PackedInts.maxValue(63)); |
| + assertEquals("64 bit -> max == Long.MAX_VALUE (same as for 63 bit)", |
| + Long.MAX_VALUE, PackedInts.maxValue(63)); |
| + } |
| + |
| + public void testPackedInts() throws IOException { |
| + Random rand = newRandom(); |
| + for(int iter=0;iter<50;iter++) { |
| + long ceil = 2; |
| + // nocommit -- need to get the 64 bit case working |
| + for(int nbits=1;nbits<63;nbits++) { |
| + final int valueCount = 100+rand.nextInt(500); |
| + final Directory d = new MockRAMDirectory(); |
| + |
| + IndexOutput out = d.createOutput("out.bin"); |
| + PackedInts.Writer w = PackedInts.getWriter( |
| + out, valueCount, nbits, |
| + PackedInts.PRIORITY.packed, PackedInts.BLOCK_PREFERENCE.bit64); |
| + |
| + final long[] values = new long[valueCount]; |
| + for(int i=0;i<valueCount;i++) { |
| + long v = rand.nextLong() % ceil; |
| + if (v < 0) { |
| + v = -v; |
| + } |
| + values[i] = v; |
| + w.add(values[i]); |
| + } |
| + w.finish(); |
| + out.close(); |
| + |
| + IndexInput in = d.openInput("out.bin"); |
| + PackedInts.Reader r = PackedInts.getReader(in); |
| + for(int i=0;i<valueCount;i++) { |
| + assertEquals("index=" + i + " ceil=" + ceil + " valueCount=" |
| + + valueCount + " nbits=" + nbits + " for " |
| + + r.getClass().getSimpleName(), values[i], r.get(i)); |
| + } |
| + in.close(); |
| + ceil *= 2; |
| + } |
| + } |
| + } |
| + |
| + public void testAligned64Writer() throws IOException { |
| + final Directory d = new MockRAMDirectory(); |
| + |
| + long[] INPUT = new long[]{1, 0, 1}; |
| + IndexOutput out = d.createOutput("out.bin"); |
| + PackedInts.Writer w = PackedInts.getWriter( |
| + out, INPUT.length, 1, |
| + PackedInts.PRIORITY.packed, PackedInts.BLOCK_PREFERENCE.bit64); |
| + for (long input: INPUT) { |
| + w.add(input); |
| + } |
| + w.finish(); |
| + out.close(); |
| + |
| + IndexInput in = d.openInput("out.bin"); |
| + PackedInts.Reader r = PackedInts.getReader(in); |
| + assertEquals("The first stored bit should be retrievable", 1, r.get(0)); |
| + assertEquals("The second stored bit should be retrievable", 0, r.get(1)); |
| + in.close(); |
| + } |
| + |
| + public void testControlledEquality() { |
| + final int VALUE_COUNT = 255; |
| + final int BITS_PER_VALUE = 8; |
| + |
| + List<PackedInts.Mutable> packedInts = |
| + createPackedInts(VALUE_COUNT, BITS_PER_VALUE); |
| + for (PackedInts.Mutable packedInt: packedInts) { |
| + for (int i = 0 ; i < packedInt.size() ; i++) { |
| + packedInt.set(i, i+1); |
| + } |
| + } |
| + assertListEquality(packedInts); |
| + } |
| + |
| + public void testRandomEquality() { |
| + final int[] VALUE_COUNTS = new int[]{1, 5, 8, 100}; |
| + final int MIN_BITS_PER_VALUE = 1; |
| + final int MAX_BITS_PER_VALUE = 64; |
| + final int RANDOM_SEED = 87; |
| + |
| + for (int valueCount: VALUE_COUNTS) { |
| + for (int bitsPerValue = MIN_BITS_PER_VALUE ; |
| + bitsPerValue <= MAX_BITS_PER_VALUE ; |
| + bitsPerValue++) { |
| + assertRandomEquality(valueCount, bitsPerValue, RANDOM_SEED); |
| + } |
| + } |
| + } |
| + |
| + /* ************************************************************************ */ |
| + |
| + // This should be disabled when merging into Lucene |
| + public void testSpeed() { |
| + final int RUN_COUNT = 3; |
| + final int SEED = 87; |
| + final int[] VALUE_COUNTS = new int[]{ |
| + 1000, 1000*1000, 10*1000*1000}; |
| + final int[] BITS_PER_VALUE = new int[]{ |
| + 1, 3, 4, 7, 8, 9, 15, 16, 17, 28, 31, 32, 33, 47, 49, 63}; |
| + final int[] GET_COUNT = new int[]{10*1000*1000}; |
| + String BASE_HEADER = String.format("%20s%20s%20s", |
| + "bitsPerValue", "valueCount", "getCount"); |
| + |
| + String oldHeader = null; |
| + |
| + for (int bitsPerValue: BITS_PER_VALUE) { |
| + for (int valueCount: VALUE_COUNTS) { |
| + for (int getCount: GET_COUNT) { |
| + List<PackedInts.Mutable> packedInts = |
| + createPackedInts(valueCount, bitsPerValue); |
| + String header = BASE_HEADER; |
| + for (PackedInts.Mutable packedInt: packedInts) { |
| + header += String.format( |
| + "%20s", packedInt.getClass().getSimpleName()); |
| + } |
| + if (!header.equals(oldHeader)) { |
| + System.out.println("\n" + header); |
| + oldHeader = header; |
| + } |
| + measureSpeed( |
| + packedInts, valueCount, bitsPerValue, getCount, |
| + RUN_COUNT, SEED); |
| + } |
| + } |
| + } |
| + } |
| + |
| + private void measureSpeed( |
| + List<? extends PackedInts.Reader> packedInts, |
| + int valueCount, int bitsPerValue, |
| + int getCount, int runCount, int seed) { |
| + StringWriter sw = new StringWriter(1000); |
| + sw.append(String.format("%20d%20d%20s", |
| + bitsPerValue, valueCount, getCount)); |
| + |
| + for (PackedInts.Reader packedInt: packedInts) { |
| + long minTime = Long.MAX_VALUE; |
| + for (int run = 0 ; run < runCount ; run++) { |
| + Random random = new Random(seed); |
| + long startTime = System.nanoTime(); |
| + for (int get = 0 ; get < getCount ; get++) { |
| + packedInt.get(random.nextInt(valueCount)); |
| + } |
| + minTime = Math.min(minTime, System.nanoTime() - startTime); |
| + } |
| + sw.append(String.format("%20d", minTime / 1000000)); |
| + } |
| + System.out.println(sw.toString()); |
| + } |
| + |
| + /* ************************************************************************ */ |
| + |
| + public void assertRandomEquality( |
| + int valueCount, int bitsPerValue, int randomSeed) { |
| + List<PackedInts.Mutable> packedInts = |
| + createPackedInts(valueCount, bitsPerValue); |
| + for (PackedInts.Mutable packedInt: packedInts) { |
| + try { |
| + fill(packedInt, (long)(Math.pow(2, bitsPerValue)-1), randomSeed); |
| + } catch (Exception e) { |
| + e.printStackTrace(System.err); |
| + fail(String.format( |
| + "Exception while filling %s: valueCount=%d, bitsPerValue=%s", |
| + packedInt.getClass().getSimpleName(), |
| + valueCount, bitsPerValue)); |
| + } |
| + } |
| + assertListEquality(packedInts); |
| + } |
| + |
| + private List<PackedInts.Mutable> createPackedInts( |
| + int valueCount, int bitsPerValue) { |
| + List<PackedInts.Mutable> packedInts = new ArrayList<PackedInts.Mutable>(); |
| + if (bitsPerValue <= 8) { |
| + packedInts.add(new PackedDirectByte(valueCount)); |
| + } |
| + if (bitsPerValue <= 16) { |
| + packedInts.add(new PackedDirectShort(valueCount)); |
| + } |
| + if (bitsPerValue <= 31) { |
| + packedInts.add(new Packed32(valueCount, bitsPerValue)); |
| + packedInts.add(new PackedAligned32(valueCount, bitsPerValue)); |
| + } |
| + if (bitsPerValue <= 32) { |
| + packedInts.add(new PackedDirectInt(valueCount)); |
| + } |
| + if (bitsPerValue <= 63) { |
| + packedInts.add(new Packed64(valueCount, bitsPerValue)); |
| + packedInts.add(new PackedAligned64(valueCount, bitsPerValue)); |
| + } |
| + packedInts.add(new PackedDirectLong(valueCount)); |
| + return packedInts; |
| + } |
| + |
| + private void fill( |
| + PackedInts.Mutable packedInt, long maxValue, int randomSeed) { |
| + maxValue++; |
| + Random random = new Random(randomSeed); |
| + for (int i = 0 ; i < packedInt.size() ; i++) { |
| + long value = Math.abs(random.nextLong() % maxValue); |
| + packedInt.set(i, value); |
| + assertEquals(String.format( |
| + "The set/get of the value at index %d should match for %s", |
| + i, packedInt.getClass().getSimpleName()), |
| + value, packedInt.get(i)); |
| + } |
| + } |
| + |
| + private void assertListEquality( |
| + List<? extends PackedInts.Reader> packedInts) { |
| + if (packedInts.size() == 0) { |
| + return; |
| + } |
| + PackedInts.Reader base = packedInts.get(0); |
| + int valueCount = base.size(); |
| + for (PackedInts.Reader packedInt: packedInts) { |
| + assertEquals("The number of values should be the same ", |
| + valueCount, packedInt.size()); |
| + } |
| + for (int i = 0 ; i < valueCount ; i++) { |
| + for (int j = 1 ; j < packedInts.size() ; j++) { |
| + assertEquals(String.format( |
| + "The value at index %d should be the same for %s and %s", |
| + i, base.getClass().getSimpleName(), |
| + packedInts.get(j).getClass().getSimpleName()), |
| + base.get(i), packedInts.get(j).get(i)); |
| + } |
| + } |
| + } |
| +} |
| Index: src/java/org/apache/lucene/util/packed/PackedAligned64.java |
| =================================================================== |
| --- src/java/org/apache/lucene/util/packed/PackedAligned64.java Fri Feb 12 02:47:26 CET 2010 |
| +++ src/java/org/apache/lucene/util/packed/PackedAligned64.java Fri Feb 12 02:47:26 CET 2010 |
| @@ -0,0 +1,175 @@ |
| +package org.apache.lucene.util.packed; |
| + |
| +import org.apache.lucene.store.IndexInput; |
| +import org.apache.lucene.util.RamUsageEstimator; |
| + |
| +import java.io.IOException; |
| +import java.util.Arrays; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +/** |
| + * Medium space and speed trade off. No values crosses block boundaries. |
| + * </p><p> |
| + * The implementation strives to avoid conditionals and expensive operations, |
| + * sacrificing code clarity to achieve better performance. |
| + * </p><p> |
| + * Space is optimally used within the boundaries of alignment, e.g. |
| + * 7 bits/value fits 7 values/block for 64 bit. |
| + * Bits are packed left-aligned to be bit pattern compatible with other bit |
| + * array implementations where possible. |
| + */ |
| +public class PackedAligned64 extends PackedInts.ReaderImpl |
| + implements PackedInts.Mutable { |
| + static final int BLOCK_SIZE = 64; // 32 = int, 64 = long |
| + |
| + private static final int ENTRY_SIZE = BLOCK_SIZE + 1; |
| + |
| + /* |
| + * A value is always positioned inside a single block, requiring a |
| + * shift right to position the bits and a mask to extract them. |
| + */ |
| + private static final int[][] SHIFTS = new int[ENTRY_SIZE][ENTRY_SIZE]; |
| + private static final long[] READ_MASKS = new long[ENTRY_SIZE]; |
| + |
| + static { // Generate shifts |
| + for (int elementBits = 1 ; elementBits <= BLOCK_SIZE ; elementBits++) { |
| + int[] currentShifts = SHIFTS[elementBits]; |
| + for (int bitPos = 0 ; bitPos < BLOCK_SIZE ; bitPos++) { |
| + currentShifts[bitPos] = BLOCK_SIZE + bitPos - elementBits; |
| + READ_MASKS[elementBits] = ~(~0L << elementBits); |
| + } |
| + } |
| + } |
| + |
| + /* |
| + * Setting a value requires clearing the destination bits with a mask, then |
| + * shifting the value to the left and or'ing the two numbers. |
| + */ |
| + private static final long[][] WRITE_MASKS = |
| + new long[ENTRY_SIZE][ENTRY_SIZE]; |
| + static { |
| + for (int elementBits = 1 ; elementBits <= BLOCK_SIZE ; elementBits++) { |
| + long elementPosMask = ~(~0L << elementBits); |
| + int[] currentShifts = SHIFTS[elementBits]; |
| + long[] currentMasks = WRITE_MASKS[elementBits]; |
| + for (int bitPos = 0 ; bitPos < BLOCK_SIZE ; bitPos++) { |
| + currentMasks[bitPos] = ~(elementPosMask |
| + << currentShifts[bitPos]); |
| + } |
| + } |
| + } |
| + |
| + /* The bits */ |
| + private long[] blocks; |
| + |
| + /* Cached values */ |
| + private int valuesPerBlock; |
| + private int[] shifts; |
| + private long readMask; |
| + private long[] writeMasks; |
| + |
| + /** |
| + * Creates an array with the internal structures adjusted for the given |
| + * limits and initialized to 0. |
| + * @param valueCount the number of values. |
| + * @param bitsPerValue the number of bits available for any given value. |
| + */ |
| + public PackedAligned64(int valueCount, int bitsPerValue) { |
| + super(valueCount, bitsPerValue); |
| + blocks = new long[size(valueCount, bitsPerValue)]; |
| + updateCached(); |
| + } |
| + |
| + private static int size(int valueCount, int bitsPerValue) { |
| + int valuesPerBlock = BLOCK_SIZE / bitsPerValue; |
| + return valueCount / valuesPerBlock + 2; |
| + } |
| + |
| + /** |
| + * Creates an array with content retrieved from the given IndexInput. |
| + * @param in an IndexInput, positioned at the start of Packed64-content. |
| + * @param valueCount the number of elements. |
| + * @param bitsPerValue the number of bits available for any given value. |
| + * @throws java.io.IOException if the values for the backing array could not |
| + * be retrieved. |
| + */ |
| + public PackedAligned64(IndexInput in, int valueCount, int bitsPerValue) |
| + throws IOException { |
| + super(valueCount, bitsPerValue); |
| + int size = size(valueCount, bitsPerValue); |
| + blocks = new long[size]; |
| + for(int i=0;i<size;i++) { |
| + blocks[i] = in.readLong(); |
| + } |
| + updateCached(); |
| + } |
| + |
| + |
| + /** |
| + * Creates an array backed by the given blocks. |
| + * </p><p> |
| + * Note: The blocks are used directly, so changes to the given block will |
| + * affect the Packed64-structure. |
| + * @param blocks used as the internal backing array. |
| + * @param valueCount the number of values. |
| + * @param bitsPerValue the number of bits available for any given value. |
| + */ |
| + public PackedAligned64(long[] blocks, int valueCount, int bitsPerValue) { |
| + // TODO: Check that blocks.length is sufficient for holding length values |
| + super(valueCount, bitsPerValue); |
| + this.blocks = blocks; |
| + updateCached(); |
| + } |
| + |
| + private void updateCached() { |
| + valuesPerBlock = BLOCK_SIZE / bitsPerValue; |
| + shifts = SHIFTS[bitsPerValue]; |
| + readMask = READ_MASKS[bitsPerValue]; |
| + writeMasks = WRITE_MASKS[bitsPerValue]; |
| + } |
| + |
| + /** |
| + * @param index the position of the value. |
| + * @return the value at the given index. |
| + */ |
| + public long get(final int index) { |
| + final int blockPos = index / valuesPerBlock; |
| + final int bitPos = (index - (blockPos * valuesPerBlock)) * bitsPerValue; |
| + |
| + return (blocks[blockPos] >>> shifts[bitPos]) & readMask; |
| + } |
| + |
| + public void set(final int index, final long value) { |
| + final int blockPos = index / valuesPerBlock; |
| + final int bitPos = (index - (blockPos * valuesPerBlock)) * bitsPerValue; |
| + |
| + blocks[blockPos] = (blocks[blockPos] & writeMasks[bitPos]) |
| + | (value << shifts[bitPos]); |
| + } |
| + |
| + public void clear() { |
| + Arrays.fill(blocks, 0); |
| + } |
| + |
| + public long ramBytesUsed() { |
| + return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER |
| + + blocks.length * RamUsageEstimator.NUM_BYTES_INT; |
| + } |
| + |
| +} |
| \ No newline at end of file |