| Index: src/java/org/apache/lucene/util/packed/PackedInts.java |
| =================================================================== |
| --- src/java/org/apache/lucene/util/packed/PackedInts.java Fri Feb 26 13:16:02 CET 2010 |
| +++ src/java/org/apache/lucene/util/packed/PackedInts.java Fri Feb 26 13:16:02 CET 2010 |
| @@ -0,0 +1,443 @@ |
| +package org.apache.lucene.util.packed; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +// nocommit -- rename to UnsignedPackedInts? or pull |
| +// minValue down |
| + |
| +import org.apache.lucene.store.IndexOutput; |
| +import org.apache.lucene.store.IndexInput; |
| +import org.apache.lucene.util.CodecUtil; |
| +import org.apache.lucene.util.Constants; |
| +import org.apache.lucene.util.ConsumesRAM; |
| + |
| +import java.io.IOException; |
| + |
| +/** |
| + * Simplistic compression for array of long values, where |
| + * each value is >= 0 and <= a specified maximum value. The |
| + * values are stored as packed ints, with each value |
| + * consuming a fixed number of bits. |
| + * |
| + * <p>NOTE: this class is meant only to be used internally |
| + * by Lucene; it's only public so it can be shared across |
| + * packages. This means the API is freely subject to |
| + * change, and, the class could be removed entirely, in any |
| + * Lucene release. Use directly at your own risk! |
| + */ |
| + |
| +// nocommit |
| +// - do we need int/long variants (for perf)? or long |
| +// only suffices? |
| +// - what native type is best perf? long/int/short/byte? |
| + |
| +public class PackedInts { |
| + |
| + private final static String CODEC_NAME = "PackedInts"; |
| + private final static int VERSION_START = 0; |
| + private final static int VERSION_CURRENT = 0; |
| + |
| + /** |
| + * The priority for selecting the Reader and Writer implementation. |
| + * </p><p> |
| + * packed: Pack the bits right after each other.<br /> |
| + * aligned32: Pack bits so that no values cross 32 bit block boundaries.<br /> |
| + * aligned64: Pack bits so that no values cross 64 bit block boundaries.<br /> |
| + * auto: Guesstimate the best implementation. |
| + * </p><p> |
| + * Note: When a more efficient structure (in terms of memory as well as speed) |
| + * can be substituted without penalty, this will be done. Example: |
| + * Asking for packed with 3 bits/value will return packed32 or packed64, while |
| + * asking for packed with 4 bits/value will return aligned32 or aligned64. |
| + * Asking for aligned with 7 bits/value and block preferences bit32 will |
| + * return direct8, as this amount of space used by an aligned32 with 7 |
| + * bits/value is the same as direct8, while direct8 is less processor- |
| + * intensive. |
| + * </p><p> |
| + * Note: 63 bits/value will always be mapped to a direct64, due to the |
| + * problem of stating maxValues > 2^63-1. |
| + * </p><p> |
| + * Note: auto will never select an aligned64 structure as this has low |
| + * performance on 32 bit machines. |
| + */ |
| + public enum STORAGE {packed, aligned32, aligned64, auto} |
| + |
| + /** |
| + * The size for the underlying blocks for packed or aligned structures. |
| + * Using 64bit blocks (longs) on a 32bit machine is slower than using 32bit |
| + * blocks (ints). |
| + */ |
| + enum BLOCK {bit32(32), bit64(64); |
| + private int bits; |
| + BLOCK(int bits) { |
| + this.bits = bits; |
| + } |
| + |
| + public int getBits() { |
| + return bits; |
| + } |
| + |
| + public static BLOCK getSystemDefault() { |
| + return Constants.JRE_IS_64BIT ? bit64 : bit32; |
| + } |
| + } |
| + |
| + /** |
| + * The specific implementation derived from bits/value, STORAGE and BLOCK. |
| + */ |
| + enum IMPLEMENTATION {packed32, packed64, aligned32, aligned64, |
| + direct8, direct16, direct32, direct64 |
| + } |
| + |
| + /** |
| + * The persistence format used when writing and reading. |
| + * @see {@link STORAGE}. |
| + */ |
| + enum PERSISTENCE {packed, aligned32, aligned64} |
| + |
| + /** |
| + * A read-only random access array of positive integers. |
| + * @lucene.internal |
| + */ |
| + public static interface Reader extends ConsumesRAM { |
| + /** |
| + * @param index the position of the wanted value. |
| + * @return the value at the stated index. |
| + */ |
| + long get(int index); |
| + |
| + /** |
| + * @return the number of bits used to store any given value. |
| + * Note: This does not imply that memory usage is |
| + * {@code bitsPerValue * #values} as implementations are free to |
| + * use non-space-optimal packing of bits. |
| + */ |
| + int getBitsPerValue(); |
| + |
| + /** |
| + * @return the number of values. |
| + */ |
| + int size(); |
| + } |
| + |
| + /** |
| + * A packed integer array that can be modified. |
| + * @lucene.internal |
| + */ |
| + public static interface Mutable extends Reader { |
| + /** |
| + * Set the value at the given index in the array. |
| + * @param index where the value should be positioned. |
| + * @param value a value conforming to the constraints set by the array. |
| + */ |
| + void set(int index, long value); |
| + |
| + /** |
| + * Sets all values to 0. |
| + */ |
| + |
| + void clear(); |
| + } |
| + |
| + /** |
| + * A simple base for Readers that keeps track of valueCount and bitsPerValue. |
| + * @lucene.internal |
| + */ |
| + public static abstract class ReaderImpl implements Reader { |
| + protected final int bitsPerValue; |
| + protected final int valueCount; |
| + |
| + protected ReaderImpl(int valueCount, int bitsPerValue) { |
| + this.bitsPerValue = bitsPerValue; |
| + this.valueCount = valueCount; |
| + } |
| + |
| + public int getBitsPerValue() { |
| + return bitsPerValue; |
| + } |
| + |
| + public int size() { |
| + return valueCount; |
| + } |
| + |
| + public long getMaxValue() { // Convenience method |
| + return maxValue(bitsPerValue); |
| + } |
| + } |
| + |
| + /** A write-once Writer. |
| + * @lucene.internal |
| + */ |
| + public static abstract class Writer { |
| + protected final IndexOutput out; |
| + protected final int bitsPerValue; |
| + protected final int valueCount; |
| + |
| + protected Writer(IndexOutput out, int valueCount, int bitsPerValue, |
| + PERSISTENCE persistence) throws IOException { |
| + assert bitsPerValue <= 64; |
| + |
| + this.out = out; |
| + this.valueCount = valueCount; |
| + this.bitsPerValue = bitsPerValue; |
| + CodecUtil.writeHeader(out, CODEC_NAME, VERSION_START); |
| + out.writeString(persistence.toString()); |
| + out.writeVInt(bitsPerValue); |
| + out.writeVInt(valueCount); |
| +// System.out.println("Writer PERSISTENCE: " + persistence + " bitsPerValue: " + bitsPerValue); |
| + } |
| + |
| + public abstract void add(long v) throws IOException; |
| + public abstract void finish() throws IOException; |
| + } |
| + |
| + /** |
| + * Retrieve PackedInt data from the IndexInput and return a packed int |
| + * structure based on it. |
| + * @param in positioned at the beginning of a stored packed int structure. |
| + * @return a read only random access capable array of positive integers. |
| + * @throws IOException if the structure could not be retrieved. |
| + * @lucene.internal |
| + */ |
| + public static Reader getReader(IndexInput in) throws IOException { |
| + CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START); |
| + String pStr = in.readString(); |
| + PERSISTENCE persistence = PERSISTENCE.valueOf(pStr); |
| + final int bitsPerValue = in.readVInt(); |
| + final int valueCount = in.readVInt(); |
| +// final long maxValue = in.readVLong(); |
| + |
| + IMPLEMENTATION implementation = |
| + getImplementation(persistence, bitsPerValue); |
| +// System.out.println("getReader PERSISTENCE: " + persistence + " bitsPerValue: " + bitsPerValue + " IMPLEMENTATION: " + implementation); |
| + switch (implementation) { |
| + case packed32: return new Packed32(in, valueCount, bitsPerValue); |
| + case packed64: return new Packed64(in, valueCount, bitsPerValue); |
| + case aligned32: return new Aligned32(in, valueCount, bitsPerValue); |
| + case aligned64: return new Aligned64(in, valueCount, bitsPerValue); |
| + case direct8: return new Direct8(in, valueCount); |
| + case direct16: return new Direct16(in, valueCount); |
| + case direct32: return new Direct32(in, valueCount); |
| + case direct64: return new Direct64(in, valueCount); |
| + default: throw new UnsupportedOperationException("Not implemented yet"); |
| + } |
| + |
| + // TODO an mmap reader as well? |
| + } |
| + |
| + /** |
| + * Create a packed integer array with the given amount of values initialized |
| + * to 0. the valueCount and the bitsPerValue cannot be changed after creation. |
| + * All Mutables known by this factory are kept fully in RAM. |
| + * @param valueCount the number of elements. |
| + * @param bitsPerValue the number of bits available for any given value. |
| + * @param storage the preferred memory-representation. |
| + * @return a mutable packed integer array. |
| + * @throws java.io.IOException if the Mutable could not be created. With the |
| + * current implementations, this never happens, but the method |
| + * signature allows for future persistence-backed Mutables. |
| + * @lucene.internal |
| + */ |
| + public static Mutable getMutable( |
| + int valueCount, int bitsPerValue, STORAGE storage) throws IOException { |
| + IMPLEMENTATION implementation = getImplementation(bitsPerValue, storage); |
| + switch (implementation) { |
| + case packed32: return new Packed32(valueCount, bitsPerValue); |
| + case packed64: return new Packed64(valueCount, bitsPerValue); |
| + case direct8: return new Direct8(valueCount); |
| + case direct16: return new Direct16(valueCount); |
| + case direct32: return new Direct32(valueCount); |
| + case direct64: return new Direct64(valueCount); |
| + case aligned32: return new Aligned32(valueCount, bitsPerValue); |
| + case aligned64: return new Aligned64(valueCount, bitsPerValue); |
| + default: throw new UnsupportedOperationException( |
| + implementation + " is not implemented yet"); |
| + } |
| + } |
| + |
| + /** |
| + * Create a packed integer array writer for the given number of values at the |
| + * given bits/value. Writers append to the given IndexOutput and has very |
| + * low memory overhead. |
| + * @param out the destination for the produced bits. |
| + * @param valueCount the number of elements. |
| + * @param bitsPerValue the number of bits available for any given value. |
| + * @param storage the preferred storage-representation. |
| + * @return a Writer ready for receiving values. |
| + * @throws IOException if bits could not be written to out. |
| + * @lucene.internal |
| + */ |
| + public static Writer getWriter( |
| + IndexOutput out, int valueCount, int bitsPerValue, |
| + STORAGE storage) throws IOException { |
| + return getWriter( |
| + out, valueCount, bitsPerValue, storage, BLOCK.getSystemDefault()); |
| + } |
| + static Writer getWriter( |
| + IndexOutput out, int valueCount, int bitsPerValue, |
| + STORAGE storage, BLOCK block) throws IOException { |
| + IMPLEMENTATION implementation = getImplementation( |
| + bitsPerValue, storage, block); |
| + switch (implementation) { |
| + case packed32: |
| + case packed64: |
| + return new PackedWriter(out, valueCount, bitsPerValue); |
| + case direct8: |
| + return new PackedWriter(out, valueCount, 8); |
| + case direct16: |
| + return new PackedWriter(out, valueCount, 16); |
| + case direct32: |
| + return new PackedWriter(out, valueCount, 32); |
| + case direct64: |
| + return new PackedWriter(out, valueCount, 64); |
| + case aligned32: |
| + return new AlignedWriter( |
| + out, valueCount, bitsPerValue, BLOCK.bit32); |
| + case aligned64: |
| + return new AlignedWriter( |
| + out, valueCount, bitsPerValue, BLOCK.bit64); |
| + default: throw new UnsupportedOperationException( |
| + implementation + " is not implemented yet"); |
| + } |
| + } |
| + |
| + /** |
| + * Derives the optimal IMPLEMENTATION based on the given preferences. Note |
| + * that the specified storage does not guarantee that the selected |
| + * implementation will be of a specific type, just that the implementations |
| + * persistence format is compatible with storage. |
| + * @param bitsPerValue the number of bits available for any given value. |
| + * The returned IMPLEMENTATION will support values of this |
| + * size or more. |
| + * @param storage memory/speed trade-off. |
| + * @return the implementation to use. |
| + */ |
| + static IMPLEMENTATION getImplementation(int bitsPerValue, STORAGE storage) { |
| + return getImplementation(bitsPerValue, storage, BLOCK.getSystemDefault()); |
| + } |
| + static IMPLEMENTATION getImplementation( |
| + int bitsPerValue, STORAGE storage, BLOCK architecture) { |
| + |
| + switch (storage) { |
| + case aligned32: { |
| + if (bitsPerValue == 7 || bitsPerValue >= 11) { |
| + bitsPerValue = getNextFixedSize(bitsPerValue); // Align to byte, short, int or long |
| + } |
| + break; |
| + } |
| + case aligned64: { |
| + if ((bitsPerValue >= 13 && bitsPerValue <= 15) || |
| + (bitsPerValue >= 22)) { |
| + bitsPerValue = getNextFixedSize(bitsPerValue); // Align to short, int or long |
| + } |
| + } |
| + } |
| + if (storage == STORAGE.auto) { |
| + if (bitsPerValue > 58 || (bitsPerValue < 32 && bitsPerValue > 29)) { // 10% space-waste is ok |
| + bitsPerValue = getNextFixedSize(bitsPerValue); |
| + } |
| + } |
| + |
| + switch (bitsPerValue) { // The safe choices |
| + case 8: return IMPLEMENTATION.direct8; |
| + case 16: return IMPLEMENTATION.direct16; |
| + case 31: |
| + case 32: return IMPLEMENTATION.direct32; |
| + case 63: |
| + case 64: return IMPLEMENTATION.direct64; |
| + } |
| + |
| + if (bitsPerValue == 1 || bitsPerValue == 2 || bitsPerValue == 4) { |
| + return storage == STORAGE.aligned64 ? |
| + IMPLEMENTATION.aligned64 : IMPLEMENTATION.aligned32; |
| + } |
| + return bitsPerValue < 32 && architecture == BLOCK.bit32 ? |
| + IMPLEMENTATION.packed32 : IMPLEMENTATION.packed64; |
| + } |
| + |
| + /** |
| + * Derives the optimal IMPLEMENTATION based on the given preferences. |
| + * Used for selecting the correct implementation from persistent data. |
| + * @param persistence the format of the existing data. |
| + * @param bitsPerValue the number of bits available for any given value. |
| + * @return the implementation to use. |
| + */ |
| + private static IMPLEMENTATION getImplementation( |
| + PERSISTENCE persistence, int bitsPerValue) { |
| + return getImplementation( |
| + persistence, bitsPerValue, BLOCK.getSystemDefault()); |
| + } |
| + private static IMPLEMENTATION getImplementation( |
| + PERSISTENCE persistence, int bitsPerValue, BLOCK architecture) { |
| + switch (bitsPerValue) { // The safe choices |
| + case 1: |
| + case 2: |
| + case 4: return architecture == BLOCK.bit32 ? |
| + IMPLEMENTATION.aligned32 : IMPLEMENTATION.aligned64; |
| + case 8: return IMPLEMENTATION.direct8; |
| + case 16: return IMPLEMENTATION.direct16; |
| + case 31: |
| + case 32: return IMPLEMENTATION.direct32; |
| + case 63: |
| + case 64: return IMPLEMENTATION.direct64; |
| + } |
| + if (persistence == PERSISTENCE.aligned32) { |
| + return IMPLEMENTATION.aligned32; |
| + } else if (persistence == PERSISTENCE.aligned64) { |
| + return IMPLEMENTATION.aligned64; |
| + } |
| + return bitsPerValue < 32 && architecture == BLOCK.bit32 ? |
| + IMPLEMENTATION.packed32 : IMPLEMENTATION.packed64; |
| + } |
| + |
| + /** Returns how many bits are required to hold values up |
| + * to and including maxValue */ |
| + public static int bitsRequired(long maxValue) { |
| + // Very high long values does not translate well to double, so we do an |
| + // explicit check for the edge cases |
| + if (maxValue > 0x3FFFFFFFFFFFFFFFL) { |
| + return 63; |
| + } if (maxValue > 0x1FFFFFFFFFFFFFFFL) { |
| + return 62; |
| + } |
| + return (int) Math.ceil(Math.log(1+maxValue)/Math.log(2.0)); |
| + } |
| + |
| + /** |
| + * Calculates the maximum unsigned long that can be expressed with the given |
| + * number of bits. |
| + * @param bitsPerValue the number of bits available for any given value. |
| + * @return the maximum value for the given bits. |
| + */ |
| + public static long maxValue(int bitsPerValue) { |
| + return bitsPerValue == 64 ? Long.MAX_VALUE : ~(~0L << bitsPerValue); |
| + } |
| + |
| + private static int getNextFixedSize(int bits) { |
| + if (bits <= 8) { |
| + return 8; |
| + } else if (bits <= 16) { |
| + return 16; |
| + } else if (bits <= 32) { |
| + return 32; |
| + } else { |
| + return 64; |
| + } |
| + } |
| + |
| +} |
| Index: src/java/org/apache/lucene/util/CodecUtil.java |
| =================================================================== |
| --- src/java/org/apache/lucene/util/CodecUtil.java Fri Jan 22 12:58:35 CET 2010 |
| +++ src/java/org/apache/lucene/util/CodecUtil.java Fri Jan 22 12:58:35 CET 2010 |
| @@ -0,0 +1,72 @@ |
| +package org.apache.lucene.util; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| + |
| +/** |
| + * <p>NOTE: this class is meant only to be used internally |
| + * by Lucene; it's only public so it can be shared across |
| + * packages. This means the API is freely subject to |
| + * change, and, the class could be removed entirely, in any |
| + * Lucene release. Use directly at your own risk! |
| + */ |
| + |
| +import org.apache.lucene.store.IndexOutput; |
| +import org.apache.lucene.store.IndexInput; |
| +import org.apache.lucene.index.CorruptIndexException; |
| + |
| +import java.io.IOException; |
| + |
| +public final class CodecUtil { |
| + private final static int CODEC_MAGIC = 0x3fd76c17; |
| + |
| + public static void writeHeader(IndexOutput out, String codec, int version) |
| + throws IOException { |
| + final long start = out.getFilePointer(); |
| + out.writeInt(CODEC_MAGIC); |
| + out.writeString(codec); |
| + out.writeInt(version); |
| + |
| + // We require this so we can easily pre-compute header length |
| + if (out.getFilePointer()-start != codec.length()+9) { |
| + throw new IllegalArgumentException("codec must be simple ASCII, less than 128 characters in length [got " + codec + "]"); |
| + } |
| + } |
| + |
| + public static int headerLength(String codec) { |
| + return 9+codec.length(); |
| + } |
| + |
| + public static int checkHeader(IndexInput in, String codec, int maxVersion) |
| + throws IOException { |
| + final int actualHeader = in.readInt(); |
| + if (actualHeader != CODEC_MAGIC) { |
| + throw new CorruptIndexException("codec header mismatch: actual header=" + actualHeader + " vs expected header=" + CODEC_MAGIC); |
| + } |
| + final String actualCodec = in.readString(); |
| + if (!actualCodec.equals(codec)) { |
| + throw new CorruptIndexException("codec mismatch: actual codec=" + actualCodec + " vs expected codec=" + codec); |
| + } |
| + final int actualVersion = in.readInt(); |
| + if (actualVersion > maxVersion) { |
| + throw new CorruptIndexException("version " + actualVersion + " is too new (expected <= version " + maxVersion + ")"); |
| + } |
| + |
| + return actualVersion; |
| + } |
| +} |
| Index: src/java/org/apache/lucene/util/packed/Packed32.java |
| =================================================================== |
| --- src/java/org/apache/lucene/util/packed/Packed32.java Tue Feb 23 14:58:46 CET 2010 |
| +++ src/java/org/apache/lucene/util/packed/Packed32.java Tue Feb 23 14:58:46 CET 2010 |
| @@ -0,0 +1,220 @@ |
| +package org.apache.lucene.util.packed; |
| + |
| +import org.apache.lucene.store.IndexInput; |
| +import org.apache.lucene.util.RamUsageEstimator; |
| + |
| +import java.io.IOException; |
| +import java.util.Arrays; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +/** |
| + * Space optimized random access capable array of values with a fixed number of |
| + * bits. The maximum number of bits/value is 31. Use {@link Packed64} for higher |
| + * numbers. |
| + * </p><p> |
| + * The implementation strives to avoid conditionals and expensive operations, |
| + * sacrificing code clarity to achieve better performance. |
| + */ |
| +class Packed32 extends PackedInts.ReaderImpl implements PackedInts.Mutable { |
| + static final int BLOCK_SIZE = 32; // 32 = int, 64 = long |
| + static final int BLOCK_BITS = 5; // The #bits representing BLOCK_SIZE |
| + static final int MOD_MASK = BLOCK_SIZE - 1; // x % BLOCK_SIZE |
| + |
| + private static final int ENTRY_SIZE = BLOCK_SIZE + 1; |
| + private static final int FAC_BITPOS = 3; |
| + |
| + /* |
| + * In order to make an efficient value-getter, conditionals should be |
| + * avoided. A value can be positioned inside of a block, requiring shifting |
| + * left or right or it can span two blocks, requiring a left-shift on the |
| + * first block and a right-shift on the right block. |
| + * </p><p> |
| + * By always shifting the first block both left and right, we get exactly |
| + * the right bits. By always shifting the second block right and applying |
| + * a mask, we get the right bits there. After that, we | the two bitsets. |
| + */ |
| + private static final int[][] SHIFTS = |
| + new int[ENTRY_SIZE][ENTRY_SIZE * FAC_BITPOS]; |
| + private static final int[][] MASKS = new int[ENTRY_SIZE][ENTRY_SIZE]; |
| + |
| + static { // Generate shifts |
| + for (int elementBits = 1 ; elementBits <= BLOCK_SIZE ; elementBits++) { |
| + for (int bitPos = 0 ; bitPos < BLOCK_SIZE ; bitPos++) { |
| + int[] currentShifts = SHIFTS[elementBits]; |
| + int base = bitPos * FAC_BITPOS; |
| + currentShifts[base ] = bitPos; |
| + currentShifts[base + 1] = BLOCK_SIZE - elementBits; |
| + if (bitPos <= BLOCK_SIZE - elementBits) { // Single block |
| + currentShifts[base + 2] = 0; |
| + MASKS[elementBits][bitPos] = 0; |
| + } else { // Two blocks |
| + int rBits = elementBits - (BLOCK_SIZE - bitPos); |
| + currentShifts[base + 2] = BLOCK_SIZE - rBits; |
| + MASKS[elementBits][bitPos] = ~(~0 << rBits); |
| + } |
| + } |
| + } |
| + } |
| + |
| + /* |
| + * The setter requires more masking than the getter. |
| + */ |
| + private static final int[][] WRITE_MASKS = |
| + new int[ENTRY_SIZE][ENTRY_SIZE * FAC_BITPOS]; |
| + static { |
| + for (int elementBits = 1 ; elementBits <= BLOCK_SIZE ; elementBits++) { |
| + int elementPosMask = ~(~0 << elementBits); |
| + int[] currentShifts = SHIFTS[elementBits]; |
| + int[] currentMasks = WRITE_MASKS[elementBits]; |
| + for (int bitPos = 0 ; bitPos < BLOCK_SIZE ; bitPos++) { |
| + int base = bitPos * FAC_BITPOS; |
| + currentMasks[base ] =~((elementPosMask |
| + << currentShifts[base + 1]) |
| + >>> currentShifts[base]); |
| + currentMasks[base+1] = ~(elementPosMask |
| + << currentShifts[base + 2]); |
| + currentMasks[base+2] = currentShifts[base + 2] == 0 ? 0 : ~0; |
| + } |
| + } |
| + } |
| + |
| + /* The bits */ |
| + private int[] blocks; |
| + |
| + // Cached calculations |
| + private int maxPos; // blocks.length * BLOCK_SIZE / bitsPerValue - 1 |
| + private int[] shifts; // The shifts for the current bitsPerValue |
| + private int[] readMasks; |
| + private int[] writeMasks; |
| + |
| + /** |
| + * Creates an array with the internal structures adjusted for the given |
| + * limits and initialized to 0. |
| + * @param valueCount the number of elements. |
| + * @param bitsPerValue the number of bits available for any given value. |
| + * Note: bitsPerValue >32 is not supported by this implementation. |
| + */ |
| + public Packed32(int valueCount, int bitsPerValue) { |
| + this(new int[(int)(((long)valueCount) * bitsPerValue / BLOCK_SIZE + 2)], |
| + valueCount, bitsPerValue); |
| + } |
| + |
| + /** |
| + * Creates an array with content retrieved from the given IndexInput. |
| + * @param in an IndexInput, positioned at the start of Packed64-content. |
| + * @param valueCount the number of elements. |
| + * @param bitsPerValue the number of bits available for any given value. |
| + * @throws java.io.IOException if the values for the backing array could not |
| + * be retrieved. |
| + */ |
| + public Packed32(IndexInput in, int valueCount, int bitsPerValue) |
| + throws IOException { |
| + super(valueCount, bitsPerValue); |
| + int size = size(bitsPerValue, valueCount); |
| + blocks = new int[size + 1]; // +1 due to non-conditional tricks |
| + for(int i = 0 ; i < size ; i++) { |
| + blocks[i] = in.readInt(); |
| + } |
| + if (size % 2 == 1) { |
| + in.readInt(); // Align to long |
| + } |
| + updateCached(); |
| + } |
| + |
| + private static int size(int bitsPerValue, int valueCount) { |
| + final long totBitCount = (long) valueCount * bitsPerValue; |
| + return (int) (totBitCount/32 + ((totBitCount % 32 == 0 ) ? 0:1)); |
| + } |
| + |
| + |
| + /** |
| + * Creates an array backed by the given blocks. |
| + * </p><p> |
| + * Note: The blocks are used directly, so changes to the given block will |
| + * affect the Packed32-structure. |
| + * @param blocks used as the internal backing array. |
| + * @param valueCount the number of values. |
| + * @param bitsPerValue the number of bits available for any given value. |
| + * Note: bitsPerValue >32 is not supported by this implementation. |
| + */ |
| + public Packed32(int[] blocks, int valueCount, int bitsPerValue) { |
| + // TODO: Check that blocks.length is sufficient for holding length values |
| + super(valueCount, bitsPerValue); |
| + if (bitsPerValue > 31) { |
| + throw new IllegalArgumentException(String.format( |
| + "This array only supports values of 31 bits or less. The " |
| + + "required number of bits was %d. The Packed64 " |
| + + "implementation allows values with more than 31 bits", |
| + bitsPerValue)); |
| + } |
| + this.blocks = blocks; |
| + updateCached(); |
| + } |
| + |
| + private void updateCached() { |
| + readMasks = MASKS[bitsPerValue]; |
| + maxPos = (int)((((long)blocks.length) * BLOCK_SIZE / bitsPerValue) - 2); |
| + shifts = SHIFTS[bitsPerValue]; |
| + writeMasks = WRITE_MASKS[bitsPerValue]; |
| + } |
| + |
| + /** |
| + * @param index the position of the value. |
| + * @return the value at the given index. |
| + */ |
| + public long get(final int index) { |
| + final long majorBitPos = index * bitsPerValue; |
| + final int elementPos = (int)(majorBitPos >>> BLOCK_BITS); // / BLOCK_SIZE |
| + final int bitPos = (int)(majorBitPos & MOD_MASK); // % BLOCK_SIZE); |
| + |
| + final int base = bitPos * FAC_BITPOS; |
| + |
| + return ((blocks[elementPos] << shifts[base]) >>> shifts[base+1]) | |
| + ((blocks[elementPos+1] >>> shifts[base+2]) |
| + & readMasks[bitPos]); |
| + } |
| + |
| + public void set(final int index, final long value) { |
| + final int intValue = (int)value; |
| + final long majorBitPos = index * bitsPerValue; |
| + final int elementPos = (int)(majorBitPos >>> BLOCK_BITS); // / BLOCK_SIZE |
| + final int bitPos = (int)(majorBitPos & MOD_MASK); // % BLOCK_SIZE); |
| + final int base = bitPos * FAC_BITPOS; |
| + |
| + blocks[elementPos ] = (blocks[elementPos ] & writeMasks[base]) |
| + | (intValue << shifts[base + 1] >>> shifts[base]); |
| + blocks[elementPos+1] = (blocks[elementPos+1] & writeMasks[base+1]) |
| + | ((intValue << shifts[base + 2]) |
| + & writeMasks[base+2]); |
| + } |
| + |
| + public void clear() { |
| + Arrays.fill(blocks, 0); |
| + } |
| + |
| + public String toString() { |
| + return "Packed32(bitsPerValue=" + bitsPerValue + ", maxPos=" + maxPos |
| + + ", elements.length=" + blocks.length + ")"; |
| + } |
| + |
| + public long ramBytesUsed() { |
| + return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER |
| + + blocks.length * RamUsageEstimator.NUM_BYTES_INT; |
| + } |
| +} |
| Index: src/java/org/apache/lucene/util/packed/Aligned32.java |
| =================================================================== |
| --- src/java/org/apache/lucene/util/packed/Aligned32.java Fri Feb 26 13:28:33 CET 2010 |
| +++ src/java/org/apache/lucene/util/packed/Aligned32.java Fri Feb 26 13:28:33 CET 2010 |
| @@ -0,0 +1,204 @@ |
| +package org.apache.lucene.util.packed; |
| + |
| +import org.apache.lucene.store.IndexInput; |
| +import org.apache.lucene.util.RamUsageEstimator; |
| + |
| +import java.io.IOException; |
| +import java.util.Arrays; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +/** |
| + * Medium space and speed trade off. No values crosses block boundaries. |
| + * The maximum number of bits/value is 32. |
| + * Use {@link Aligned64} for higher numbers. |
| + * </p><p> |
| + * The implementation strives to avoid conditionals and expensive operations, |
| + * sacrificing code clarity to achieve better performance. |
| + * </p><p> |
| + * Space is optimally used within the boundaries of alignment, e.g. |
| + * 7 bits/value fits 4 values/block for 32 bit and 7 values/block for 64 bit. |
| + * Bits are packed left-aligned to be bit pattern compatible with other bit |
| + * array implementations where possible. |
| + */ |
| +class Aligned32 extends PackedInts.ReaderImpl |
| + implements PackedInts.Mutable { |
| + static final int BLOCK_SIZE = 32; // 32 = int, 64 = long |
| + |
| + private static final int ENTRY_SIZE = BLOCK_SIZE + 1; |
| + |
| + /* |
| + * A value is always positioned inside a single block, requiring a |
| + * shift right to position the bits and a mask to extract them. |
| + */ |
| + private static final int[][] SHIFTS = new int[ENTRY_SIZE][ENTRY_SIZE]; |
| + private static final int[] READ_MASKS = new int[ENTRY_SIZE]; |
| + |
| + static { // Generate shifts |
| + for (int elementBits = 1 ; elementBits <= BLOCK_SIZE ; elementBits++) { |
| + int[] currentShifts = SHIFTS[elementBits]; |
| + for (int bitPos = 0 ; bitPos < BLOCK_SIZE ; bitPos++) { |
| + currentShifts[bitPos] = BLOCK_SIZE - elementBits - bitPos; |
| + READ_MASKS[elementBits] = ~(~0 << elementBits); |
| + } |
| + } |
| + } |
| + |
| + /* |
| + * Setting a value requires clearing the destination bits with a mask, then |
| + * shifting the value to the left and or'ing the two numbers. |
| + */ |
| + private static final int[][] WRITE_MASKS = new int[ENTRY_SIZE][ENTRY_SIZE]; |
| + static { |
| + for (int elementBits = 1 ; elementBits <= BLOCK_SIZE ; elementBits++) { |
| + int elementPosMask = ~(~0 << elementBits); |
| + int[] currentShifts = SHIFTS[elementBits]; |
| + int[] currentMasks = WRITE_MASKS[elementBits]; |
| + for (int bitPos = 0 ; bitPos < BLOCK_SIZE ; bitPos++) { |
| + currentMasks[bitPos] = ~(elementPosMask |
| + << currentShifts[bitPos]); |
| + } |
| + } |
| + } |
| + |
| + /* The bits */ |
| + private int[] blocks; |
| + |
| + /* Cached values */ |
| + private int valuesPerBlock; |
| + private int[] shifts; |
| + private int readMask; |
| + private int[] writeMasks; |
| + |
| + /** |
| + * Creates an array with the internal structures adjusted for the given |
| + * limits and initialized to 0. |
| + * @param valueCount the number of values. |
| + * @param bitsPerValue the number of bits available for any given value. |
| + */ |
| + public Aligned32(int valueCount, int bitsPerValue) { |
| + super(valueCount, bitsPerValue); |
| + if (bitsPerValue > 32) { |
| + throw new IllegalArgumentException(String.format( |
| + "This array only supports values of 32 bits or less. The " |
| + + "required number of bits was %d. The Aligned64 " |
| + + "implementation allows values with more than 32 bits", |
| + bitsPerValue)); |
| + } |
| + blocks = new int[size(valueCount, bitsPerValue)]; |
| + updateCached(); |
| + } |
| + |
| + private static int size(int valueCount, int bitsPerValue) { |
| + int valuesPerBlock = BLOCK_SIZE / bitsPerValue; |
| + return valueCount == 0 ? 0 : (valueCount-1) / valuesPerBlock + 1; |
| + } |
| + |
| + /** |
| + * Creates an array with content retrieved from the given IndexInput. |
| + * @param in an IndexInput, positioned at the start of Packed64-content. |
| + * @param valueCount the number of elements. |
| + * @param bitsPerValue the number of bits available for any given value. |
| + * @throws java.io.IOException if the values for the backing array could not |
| + * be retrieved. |
| + */ |
| + public Aligned32(IndexInput in, int valueCount, int bitsPerValue) |
| + throws IOException { |
| + super(valueCount, bitsPerValue); |
| + int size = size(valueCount, bitsPerValue); |
| + blocks = new int[size]; |
| + for(int i = 0 ; i < size ; i++) { |
| + blocks[i] = in.readInt(); |
| +// System.out.println("Reading @bit32: " + Integer.toBinaryString((blocks[i])) + " (" + blocks[i] + ")"); |
| + } |
| + if (size % 2 == 1) { |
| + in.readInt(); // Align to long |
| + } |
| + in.readLong(); // Packed compatibility |
| + updateCached(); |
| + } |
| + |
| + |
| + /** |
| + * Creates an array backed by the given blocks. |
| + * </p><p> |
| + * Note: The blocks are used directly, so changes to the given block will |
| + * affect the Packed32-structure. |
| + * @param blocks used as the internal backing array. |
| + * @param valueCount the number of values. |
| + * @param bitsPerValue the number of bits available for any given value. |
| + */ |
| + public Aligned32(int[] blocks, int valueCount, int bitsPerValue) { |
| + // TODO: Check that blocks.length is sufficient for holding length values |
| + super(valueCount, bitsPerValue); |
| + this.blocks = blocks; |
| + updateCached(); |
| + } |
| + |
| + private void updateCached() { |
| + valuesPerBlock = BLOCK_SIZE / bitsPerValue; |
| + shifts = SHIFTS[bitsPerValue]; |
| + readMask = READ_MASKS[bitsPerValue]; |
| + writeMasks = WRITE_MASKS[bitsPerValue]; |
| + } |
| + |
| + /** |
| + * @param index the position of the value. |
| + * @return the value at the given index. |
| + */ |
| + public long get(final int index) { |
| + final int blockPos = index / valuesPerBlock; |
| + final int bitPos = (index - (blockPos * valuesPerBlock)) * bitsPerValue; |
| + |
| + return (blocks[blockPos] >>> shifts[bitPos]) & readMask; |
| + } |
| + |
| + public void set(final int index, final long value) { |
| + final int intValue = (int)value; |
| + |
| + final int blockPos = index / valuesPerBlock; |
| + final int bitPos = (index - (blockPos * valuesPerBlock)) * bitsPerValue; |
| + |
| + blocks[blockPos] = (blocks[blockPos] & writeMasks[bitPos]) |
| + | (intValue << shifts[bitPos]); |
| + } |
| + |
| + public void clear() { |
| + Arrays.fill(blocks, 0); |
| + } |
| + |
| + public long ramBytesUsed() { |
| + return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER |
| + + blocks.length * RamUsageEstimator.NUM_BYTES_INT; |
| + } |
| + |
| + public String toString() { |
| + return "Aligned32(" + valueCount + " values at " |
| + + bitsPerValue + " bits/value)"; |
| + } |
| + |
| + /** |
| + * The backing array contains the bits for the values in this structure. |
| + * The array is returned directly, so any changes will be reflected both ways. |
| + * Expert use only. |
| + * @return the backing array. |
| + */ |
| + int[] getBackingArray() { |
| + return blocks; |
| + } |
| +} |
| \ No newline at end of file |
| Index: src/java/org/apache/lucene/store/IndexInput.java |
| =================================================================== |
| --- src/java/org/apache/lucene/store/IndexInput.java (revision 895342) |
| +++ src/java/org/apache/lucene/store/IndexInput.java Tue Feb 23 11:19:38 CET 2010 |
| @@ -64,6 +64,13 @@ |
| readBytes(b, offset, len); |
| } |
| |
| + /** Reads two bytes and returns a short. |
| + * @see IndexOutput#writeByte(byte) |
| + */ |
| + public short readShort() throws IOException { |
| + return (short) (((readByte() & 0xFF) << 8) | (readByte() & 0xFF)); |
| + } |
| + |
| /** Reads four bytes and returns an int. |
| * @see IndexOutput#writeInt(int) |
| */ |
| Index: src/java/org/apache/lucene/util/RamUsageEstimator.java |
| =================================================================== |
| --- src/java/org/apache/lucene/util/RamUsageEstimator.java (revision 901710) |
| +++ src/java/org/apache/lucene/util/RamUsageEstimator.java Fri Jan 22 13:01:30 CET 2010 |
| @@ -35,6 +35,16 @@ |
| * estimate is complete. |
| */ |
| public final class RamUsageEstimator { |
| + |
| + public static int NUM_BYTES_SHORT = 2; |
| + public static int NUM_BYTES_INT = 4; |
| + public static int NUM_BYTES_LONG = 8; |
| + public static int NUM_BYTES_FLOAT = 4; |
| + public static int NUM_BYTES_DOUBLE = 8; |
| + public static int NUM_BYTES_OBJ_HEADER = 8; |
| + public static int NUM_BYTES_OBJ_REF = Constants.JRE_IS_64BIT ? 8 : 4; |
| + public static int NUM_BYTES_ARRAY_HEADER = NUM_BYTES_OBJ_HEADER + NUM_BYTES_INT + NUM_BYTES_OBJ_REF; |
| + |
| private MemoryModel memoryModel; |
| |
| private final Map<Object,Object> seen; |
| @@ -45,11 +55,6 @@ |
| |
| public final static int NUM_BYTES_OBJECT_REF = Constants.JRE_IS_64BIT ? 8 : 4; |
| public final static int NUM_BYTES_CHAR = 2; |
| - public final static int NUM_BYTES_SHORT = 2; |
| - public final static int NUM_BYTES_INT = 4; |
| - public final static int NUM_BYTES_LONG = 8; |
| - public final static int NUM_BYTES_FLOAT = 4; |
| - public final static int NUM_BYTES_DOUBLE = 8; |
| |
| private boolean checkInterned; |
| |
| Index: src/java/org/apache/lucene/util/packed/Direct8.java |
| =================================================================== |
| --- src/java/org/apache/lucene/util/packed/Direct8.java Mon Feb 22 08:42:35 CET 2010 |
| +++ src/java/org/apache/lucene/util/packed/Direct8.java Mon Feb 22 08:42:35 CET 2010 |
| @@ -0,0 +1,86 @@ |
| +package org.apache.lucene.util.packed; |
| + |
| +import org.apache.lucene.store.IndexInput; |
| +import org.apache.lucene.store.IndexOutput; |
| +import org.apache.lucene.util.RamUsageEstimator; |
| + |
| +import java.io.IOException; |
| +import java.util.Arrays; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +/** |
| + * Direct wrapping of 8 bit values to a backing array of bytes. |
| + */ |
| +class Direct8 extends PackedInts.ReaderImpl |
| + implements PackedInts.Mutable { |
| + private byte[] blocks; |
| + private static final int BITS_PER_VALUE = 8; |
| + |
| + public Direct8(int valueCount) { |
| + super(valueCount, BITS_PER_VALUE); |
| + blocks = new byte[valueCount]; |
| + } |
| + |
| + public Direct8(IndexInput in, int valueCount) |
| + throws IOException { |
| + super(valueCount, BITS_PER_VALUE); |
| + byte[] blocks = new byte[valueCount]; |
| + for(int i=0;i<valueCount;i++) { |
| + blocks[i] = in.readByte(); |
| + } |
| + final int mod = valueCount % 8; |
| + if (mod != 0) { |
| + final int pad = 8-mod; |
| + // round out long |
| + for(int i=0;i<pad;i++) { |
| + in.readByte(); |
| + } |
| + } |
| + |
| + this.blocks = blocks; |
| + } |
| + |
| + /** |
| + * Creates an array backed by the given blocks. |
| + * </p><p> |
| + * Note: The blocks are used directly, so changes to the given block will |
| + * affect the structure. |
| + * @param blocks used as the internal backing array. |
| + */ |
| + public Direct8(byte[] blocks) { |
| + super(blocks.length, BITS_PER_VALUE); |
| + this.blocks = blocks; |
| + } |
| + |
| + public long get(final int index) { |
| + return 0xFFL & blocks[index]; |
| + } |
| + |
| + public void set(final int index, final long value) { |
| + blocks[index] = (byte)(value & 0xFF); |
| + } |
| + |
| + public long ramBytesUsed() { |
| + return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + blocks.length; |
| + } |
| + |
| + public void clear() { |
| + Arrays.fill(blocks, (byte)0); |
| + } |
| +} |
| \ No newline at end of file |
| Index: src/java/org/apache/lucene/util/ConsumesRAM.java |
| =================================================================== |
| --- src/java/org/apache/lucene/util/ConsumesRAM.java Fri Jan 22 12:58:35 CET 2010 |
| +++ src/java/org/apache/lucene/util/ConsumesRAM.java Fri Jan 22 12:58:35 CET 2010 |
| @@ -0,0 +1,22 @@ |
| +package org.apache.lucene.util; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +public interface ConsumesRAM { |
| + public long ramBytesUsed(); |
| +} |
| Index: src/java/org/apache/lucene/util/packed/PackedIntsPerformance.java |
| =================================================================== |
| --- src/java/org/apache/lucene/util/packed/PackedIntsPerformance.java Fri Feb 26 15:35:48 CET 2010 |
| +++ src/java/org/apache/lucene/util/packed/PackedIntsPerformance.java Fri Feb 26 15:35:48 CET 2010 |
| @@ -0,0 +1,141 @@ |
| +package org.apache.lucene.util.packed; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import java.io.StringWriter; |
| +import java.util.*; |
| + |
| +/** |
| + * Simple performance testing of PackedInts. |
| + */ |
| +public class PackedIntsPerformance { |
| + public static void main(String[] args) { |
| + long startTime = System.currentTimeMillis(); |
| + new PackedIntsPerformance().testSpeed(); |
| + System.out.println( |
| + "\nTotal execution time: " |
| + + (System.currentTimeMillis() - startTime) / 1000 + " seconds"); |
| + } |
| + |
| + private int tests = 0; |
| + private Map<String, Long> ms = new LinkedHashMap<String, Long>(); |
| + |
| + public void testSpeed() { |
| + final int RUN_COUNT = 3; |
| + final int SEED = 87; |
| + final int[] VALUE_COUNTS = new int[]{ |
| + 1000, 1000*1000, 10*1000*1000}; |
| + final int[] BITS_PER_VALUE = new int[]{ |
| + 1, 3, 4, 7, 8, 9, 15, 16, 17, 28, 31, 32, 33, 47, 49, 63}; |
| + final int[] GET_COUNT = new int[]{10*1000*1000}; |
| + String BASE_HEADER = String.format("%12s%12s%12s", |
| + "bitsPerValue", "valueCount", "getCount"); |
| + |
| + String oldHeader = null; |
| + |
| + for (int bitsPerValue: BITS_PER_VALUE) { |
| + for (int valueCount: VALUE_COUNTS) { |
| + for (int getCount: GET_COUNT) { |
| + List<PackedInts.Mutable> packedInts = |
| + createPackedInts(valueCount, bitsPerValue); |
| + String header = BASE_HEADER; |
| + for (PackedInts.Mutable packedInt: packedInts) { |
| + header += String.format( |
| + "%12s", packedInt.getClass().getSimpleName()); |
| + } |
| + if (!header.equals(oldHeader)) { |
| + mean(); |
| + System.out.println("\n" + header); |
| + oldHeader = header; |
| + } |
| + measureSpeed( |
| + packedInts, valueCount, bitsPerValue, getCount, |
| + RUN_COUNT, SEED); |
| + } |
| + } |
| + } |
| + mean(); |
| + } |
| + |
| + private void mean() { |
| + if (ms.size() != 0) { |
| + System.out.print("Mean: "); |
| + for (Map.Entry<String, Long> entry: ms.entrySet()) { |
| + System.out.print(String.format( |
| + "%12s", entry.getValue() / tests)); |
| + } |
| + System.out.println(""); |
| + } |
| + ms.clear(); |
| + tests = 0; |
| + } |
| + |
| + private void measureSpeed( |
| + List<? extends PackedInts.Reader> packedInts, |
| + int valueCount, int bitsPerValue, |
| + int getCount, int runCount, int seed) { |
| + tests++; |
| + StringWriter sw = new StringWriter(1000); |
| + sw.append(String.format("%12d%12d%12s", |
| + bitsPerValue, valueCount, getCount)); |
| + |
| + for (PackedInts.Reader packedInt: packedInts) { |
| + long minTime = Long.MAX_VALUE; |
| + for (int run = 0 ; run < runCount ; run++) { |
| + Random random = new Random(seed); |
| + long startTime = System.nanoTime(); |
| + for (int get = 0 ; get < getCount ; get++) { |
| + packedInt.get(random.nextInt(valueCount)); |
| + } |
| + minTime = Math.min(minTime, System.nanoTime() - startTime); |
| + } |
| + String key = packedInt.getClass().getSimpleName(); |
| + ms.put(key, ms.containsKey(key) ? ms.get(key) + minTime / 1000000 : |
| + minTime / 1000000); |
| + sw.append(String.format("%12d", minTime / 1000000)); |
| + } |
| + System.out.println(sw.toString()); |
| + } |
| + |
| + // Copy-paste from TestPackedInts |
| + private static List<PackedInts.Mutable> createPackedInts( |
| + int valueCount, int bitsPerValue) { |
| + List<PackedInts.Mutable> packedInts = new ArrayList<PackedInts.Mutable>(); |
| + if (bitsPerValue <= 8) { |
| + packedInts.add(new Direct8(valueCount)); |
| + } |
| + if (bitsPerValue <= 16) { |
| + packedInts.add(new Direct16(valueCount)); |
| + } |
| + if (bitsPerValue <= 31) { |
| + packedInts.add(new Packed32(valueCount, bitsPerValue)); |
| + packedInts.add(new Aligned32(valueCount, bitsPerValue)); |
| + } |
| + if (bitsPerValue <= 32) { |
| + packedInts.add(new Direct32(valueCount)); |
| + } |
| + if (bitsPerValue <= 63) { |
| + packedInts.add(new Packed64(valueCount, bitsPerValue)); |
| + packedInts.add(new Aligned64(valueCount, bitsPerValue)); |
| + } |
| + packedInts.add(new Direct64(valueCount)); |
| + return packedInts; |
| + } |
| + |
| + |
| +} |
| Index: src/java/org/apache/lucene/util/packed/TODO |
| =================================================================== |
| --- src/java/org/apache/lucene/util/packed/TODO Fri Feb 26 16:02:39 CET 2010 |
| +++ src/java/org/apache/lucene/util/packed/TODO Fri Feb 26 16:02:39 CET 2010 |
| @@ -0,0 +1,367 @@ |
| +- Test whether aligned is always faster than packed |
| +Aligned uses more logic (mainly a division), but packed requests two ints/longs |
| +from RAM for each request. If the extra logic is always slower, we should avoid |
| +using aligned at all. This would also make the persistent structure consistent |
| +between the remaining implementations (packed and direct). |
| + |
| +- Better JavaDocs (as always) |
| + |
| + |
| + |
| +******************************************************************************** |
| +Run performance tests with |
| +java -cp lucene-core-3.1-dev.jar org.apache.lucene.util.packed.PackedIntsPerformance |
| +******************************************************************************** |
| + |
| + |
| +******************************************************************************** |
| +testSpeed result by Toke Eskildsen (te@statsbiblioteket.dk) 2010-02-26 |
| +Java 1.6.0_15-b03 64bit Server, default settings, Linux |
| +Dell Precision M6500: Intel i7 Q 820 @ 1.73GHz, 8 MB cache, |
| + dual-channel PC 1333 RAM |
| +******************************************************************************** |
| +bitsPerValue valueCount getCount Direct8 Direct16 Packed32 Aligned32 Direct32 Packed64 Aligned64 Direct64 |
| + 1 1000 10000000 129 131 230 223 154 245 225 154 |
| + 1 1000000 10000000 215 218 246 221 229 242 219 326 |
| + 1 10000000 10000000 344 459 264 263 500 265 260 536 |
| + 3 1000 10000000 161 157 251 228 154 247 225 156 |
| + 3 1000000 10000000 210 217 257 244 225 255 239 321 |
| + 3 10000000 10000000 353 457 274 283 747 277 275 531 |
| + 4 1000 10000000 160 157 252 227 154 248 225 156 |
| + 4 1000000 10000000 211 216 263 258 225 260 246 293 |
| + 4 10000000 10000000 351 460 283 290 495 283 283 533 |
| + 7 1000 10000000 161 157 254 228 154 250 225 157 |
| + 7 1000000 10000000 212 216 265 262 224 267 257 292 |
| + 7 10000000 10000000 352 459 359 403 496 360 371 534 |
| + 8 1000 10000000 160 157 252 227 154 248 225 155 |
| + 8 1000000 10000000 210 215 266 261 243 265 259 288 |
| + 8 10000000 10000000 351 456 390 400 596 389 394 532 |
| +Mean: 238 275 273 267 316 273 261 330 |
| + |
| +bitsPerValue valueCount getCount Direct16 Packed32 Aligned32 Direct32 Packed64 Aligned64 Direct64 |
| + 9 1000 10000000 157 252 228 154 251 224 155 |
| + 9 1000000 10000000 216 266 264 224 268 259 295 |
| + 9 10000000 10000000 459 413 454 495 413 423 535 |
| + 15 1000 10000000 157 251 228 154 250 224 155 |
| + 15 1000000 10000000 215 266 266 225 269 263 304 |
| + 15 10000000 10000000 457 484 499 495 488 499 533 |
| + 16 1000 10000000 158 252 227 155 249 295 157 |
| + 16 1000000 10000000 217 268 269 221 268 263 294 |
| + 16 10000000 10000000 456 490 499 494 686 497 533 |
| +Mean: 276 326 326 290 349 327 329 |
| + |
| +bitsPerValue valueCount getCount Packed32 Aligned32 Direct32 Packed64 Aligned64 Direct64 |
| + 17 1000 10000000 255 227 154 247 225 155 |
| + 17 1000000 10000000 267 275 228 267 267 294 |
| + 17 10000000 10000000 500 550 500 504 526 533 |
| + 28 1000 10000000 253 227 154 250 226 155 |
| + 28 1000000 10000000 273 279 224 272 280 292 |
| + 28 10000000 10000000 536 552 501 541 550 535 |
| + 31 1000 10000000 255 230 154 250 226 162 |
| + 31 1000000 10000000 283 277 228 273 279 309 |
| + 31 10000000 10000000 544 549 498 544 550 534 |
| +Mean: 351 351 293 349 347 329 |
| + |
| +bitsPerValue valueCount getCount Direct32 Packed64 Aligned64 Direct64 |
| + 32 1000 10000000 155 249 226 156 |
| + 32 1000000 10000000 226 278 277 292 |
| + 32 10000000 10000000 499 548 553 533 |
| +Mean: 293 358 352 327 |
| + |
| +bitsPerValue valueCount getCount Packed64 Aligned64 Direct64 |
| + 33 1000 10000000 250 226 156 |
| + 33 1000000 10000000 273 340 286 |
| + 33 10000000 10000000 551 584 533 |
| + 47 1000 10000000 250 226 157 |
| + 47 1000000 10000000 294 340 286 |
| + 47 10000000 10000000 564 582 535 |
| + 49 1000 10000000 250 228 156 |
| + 49 1000000 10000000 292 349 292 |
| + 49 10000000 10000000 568 585 533 |
| + 63 1000 10000000 249 227 156 |
| + 63 1000000 10000000 331 355 319 |
| + 63 10000000 10000000 581 584 535 |
| +Mean: 371 385 328 |
| + |
| +Total execution time: 271 seconds |
| + |
| +******************************************************************************** |
| +testSpeed result by Toke Eskildsen (te@statsbiblioteket.dk) 2010-02-26 |
| +Java 1.6.0_15-b03 64bit Server, default settings, Linux |
| +Server ps3: Intel Xeon L5420 @ 2.50GHz, 6 MB cache |
| +******************************************************************************** |
| +bitsPerValue valueCount getCount Direct8 Direct16 Packed32 Aligned32 Direct32 Packed64 Aligned64 Direct64 |
| + 1 1000 10000000 288 309 398 425 365 421 421 392 |
| + 1 1000000 10000000 364 371 419 440 389 417 440 636 |
| + 1 10000000 10000000 749 1097 386 382 1259 385 378 1351 |
| + 3 1000 10000000 305 300 357 340 298 369 338 298 |
| + 3 1000000 10000000 308 325 376 372 331 375 367 578 |
| + 3 10000000 10000000 757 1097 399 405 1261 396 398 1354 |
| + 4 1000 10000000 305 300 357 340 298 369 338 298 |
| + 4 1000000 10000000 309 325 376 373 329 385 367 579 |
| + 4 10000000 10000000 758 1097 424 418 1259 413 422 1351 |
| + 7 1000 10000000 305 300 370 340 298 369 337 298 |
| + 7 1000000 10000000 309 325 379 376 329 379 371 583 |
| + 7 10000000 10000000 758 1098 731 828 1259 747 738 1351 |
| + 8 1000 10000000 305 300 357 340 298 369 337 298 |
| + 8 1000000 10000000 308 325 378 376 326 384 373 579 |
| + 8 10000000 10000000 758 1098 830 829 1260 847 828 1352 |
| +Mean: 459 577 435 438 637 441 430 753 |
| + |
| +bitsPerValue valueCount getCount Direct16 Packed32 Aligned32 Direct32 Packed64 Aligned64 Direct64 |
| + 9 1000 10000000 300 357 340 298 368 337 298 |
| + 9 1000000 10000000 325 381 383 329 379 375 581 |
| + 9 10000000 10000000 1097 907 1001 1262 925 913 1354 |
| + 15 1000 10000000 301 364 340 298 369 337 298 |
| + 15 1000000 10000000 325 387 390 327 386 386 581 |
| + 15 10000000 10000000 1097 1149 1174 1262 1172 1173 1354 |
| + 16 1000 10000000 300 357 340 298 369 356 298 |
| + 16 1000000 10000000 325 387 390 328 387 386 583 |
| + 16 10000000 10000000 1096 1172 1173 1261 1193 1172 1354 |
| +Mean: 574 606 614 629 616 603 744 |
| + |
| +bitsPerValue valueCount getCount Packed32 Aligned32 Direct32 Packed64 Aligned64 Direct64 |
| + 17 1000 10000000 357 349 298 368 337 298 |
| + 17 1000000 10000000 388 392 329 388 391 581 |
| + 17 10000000 10000000 1192 1323 1261 1214 1261 1356 |
| + 28 1000 10000000 364 349 298 379 337 298 |
| + 28 1000000 10000000 395 377 328 395 398 578 |
| + 28 10000000 10000000 1320 1323 1262 1344 1349 1354 |
| + 31 1000 10000000 364 349 298 369 337 298 |
| + 31 1000000 10000000 396 379 436 396 398 577 |
| + 31 10000000 10000000 1339 1322 1262 1362 1348 1355 |
| +Mean: 679 684 641 690 684 743 |
| + |
| +bitsPerValue valueCount getCount Direct32 Packed64 Aligned64 Direct64 |
| + 32 1000 10000000 298 369 337 298 |
| + 32 1000000 10000000 327 401 401 579 |
| + 32 10000000 10000000 1262 1367 1348 1354 |
| +Mean: 629 712 695 743 |
| + |
| +bitsPerValue valueCount getCount Packed64 Aligned64 Direct64 |
| + 33 1000 10000000 369 342 298 |
| + 33 1000000 10000000 403 637 579 |
| + 33 10000000 10000000 1373 1416 1354 |
| + 47 1000 10000000 369 342 298 |
| + 47 1000000 10000000 472 636 576 |
| + 47 10000000 10000000 1421 1415 1351 |
| + 49 1000 10000000 369 342 298 |
| + 49 1000000 10000000 490 635 578 |
| + 49 10000000 10000000 1426 1414 1352 |
| + 63 1000 10000000 369 342 298 |
| + 63 1000000 10000000 662 642 580 |
| + 63 10000000 10000000 1454 1415 1354 |
| +Mean: 764 798 743 |
| + |
| +Total execution time: 530 seconds |
| + |
| +******************************************************************************** |
| +testSpeed result by Toke Eskildsen (te@statsbiblioteket.dk) 2010-02-26 |
| +Java 1.6.0_15-b03 64bit Server, default settings, Linux |
| +Workstation pc286: Intel Core 2 E6550 @ 2.33GHz, 4 MB cache |
| +******************************************************************************** |
| +bitsPerValue valueCount getCount Direct8 Direct16 Packed32 Aligned32 Direct32 Packed64 Aligned64 Direct64 |
| + 1 1000 10000000 361 374 483 529 437 490 521 436 |
| + 1 1000000 10000000 411 420 473 549 519 471 545 924 |
| + 1 10000000 10000000 934 1136 403 453 1245 405 447 1337 |
| + 3 1000 10000000 333 328 396 394 336 395 391 336 |
| + 3 1000000 10000000 333 354 395 441 446 396 432 848 |
| + 3 10000000 10000000 949 1156 487 570 1267 514 560 1336 |
| + 4 1000 10000000 333 328 394 392 334 396 389 334 |
| + 4 1000000 10000000 335 354 399 442 452 400 433 847 |
| + 4 10000000 10000000 950 1156 664 704 1267 654 700 1334 |
| + 7 1000 10000000 333 328 405 392 334 401 389 334 |
| + 7 1000000 10000000 335 355 404 444 453 397 435 846 |
| + 7 10000000 10000000 947 1156 963 1088 1268 976 1031 1335 |
| + 8 1000 10000000 334 330 394 392 334 390 389 334 |
| + 8 1000000 10000000 335 355 400 444 445 490 434 848 |
| + 8 10000000 10000000 948 1155 1022 1089 1267 1035 1082 1335 |
| +Mean: 544 619 512 554 693 520 545 850 |
| + |
| +bitsPerValue valueCount getCount Direct16 Packed32 Aligned32 Direct32 Packed64 Aligned64 Direct64 |
| + 9 1000 10000000 330 394 392 334 391 389 334 |
| + 9 1000000 10000000 354 419 451 451 400 438 844 |
| + 9 10000000 10000000 1155 1064 1192 1267 1079 1136 1335 |
| + 15 1000 10000000 328 394 392 334 390 389 334 |
| + 15 1000000 10000000 355 416 459 448 411 454 847 |
| + 15 10000000 10000000 1156 1209 1299 1267 1221 1295 1335 |
| + 16 1000 10000000 330 394 392 334 391 389 334 |
| + 16 1000000 10000000 368 411 459 449 413 454 846 |
| + 16 10000000 10000000 1156 1222 1356 1383 1235 1297 1336 |
| +Mean: 614 658 710 696 659 693 838 |
| + |
| +bitsPerValue valueCount getCount Packed32 Aligned32 Direct32 Packed64 Aligned64 Direct64 |
| + 17 1000 10000000 394 392 334 391 389 334 |
| + 17 1000000 10000000 411 570 450 413 477 846 |
| + 17 10000000 10000000 1233 1401 1267 1246 1351 1334 |
| + 28 1000 10000000 394 392 334 390 389 334 |
| + 28 1000000 10000000 477 587 448 468 563 847 |
| + 28 10000000 10000000 1308 1400 1267 1319 1408 1335 |
| + 31 1000 10000000 394 392 334 397 389 334 |
| + 31 1000000 10000000 501 576 456 514 564 848 |
| + 31 10000000 10000000 1320 1401 1268 1331 1407 1335 |
| +Mean: 714 790 684 718 770 838 |
| + |
| +bitsPerValue valueCount getCount Direct32 Packed64 Aligned64 Direct64 |
| + 32 1000 10000000 334 391 389 334 |
| + 32 1000000 10000000 456 572 565 849 |
| + 32 10000000 10000000 1267 1334 1407 1335 |
| +Mean: 685 765 787 839 |
| + |
| +bitsPerValue valueCount getCount Packed64 Aligned64 Direct64 |
| + 33 1000 10000000 391 389 334 |
| + 33 1000000 10000000 533 976 845 |
| + 33 10000000 10000000 1336 1467 1335 |
| + 47 1000 10000000 391 389 334 |
| + 47 1000000 10000000 759 975 846 |
| + 47 10000000 10000000 1368 1473 1338 |
| + 49 1000 10000000 390 389 334 |
| + 49 1000000 10000000 781 974 848 |
| + 49 10000000 10000000 1372 1475 1335 |
| + 63 1000 10000000 391 389 334 |
| + 63 1000000 10000000 925 975 846 |
| + 63 10000000 10000000 1392 1475 1334 |
| +Mean: 835 945 838 |
| + |
| +Total execution time: 598 seconds |
| + |
| +******************************************************************************** |
| +testSpeed result by Toke Eskildsen (te@statsbiblioteket.dk) 2010-02-26 |
| +Java 1.6.0_03-b05 64bit Server, default settings, Linux |
| +Server metis: Intel Xeon 5148 @ 2.33GHz, 4 MB cache |
| +******************************************************************************** |
| +bitsPerValue valueCount getCount Direct8 Direct16 Packed32 Aligned32 Direct32 Packed64 Aligned64 Direct64 |
| + 1 1000 10000000 404 410 527 574 480 539 573 480 |
| + 1 1000000 10000000 451 468 520 596 563 520 592 989 |
| + 1 10000000 10000000 1073 1326 530 610 1435 528 611 1523 |
| + 3 1000 10000000 474 474 541 576 473 540 570 469 |
| + 3 1000000 10000000 445 460 519 598 584 520 600 984 |
| + 3 10000000 10000000 1098 1323 602 721 1439 626 697 1518 |
| + 4 1000 10000000 473 473 540 575 474 541 571 470 |
| + 4 1000000 10000000 445 461 518 600 554 522 601 985 |
| + 4 10000000 10000000 1100 1327 785 839 1443 765 853 1525 |
| + 7 1000 10000000 474 474 542 577 475 543 572 471 |
| + 7 1000000 10000000 446 463 519 602 556 522 601 985 |
| + 7 10000000 10000000 1104 1329 1123 1261 1442 1144 1206 1523 |
| + 8 1000 10000000 474 474 541 575 473 540 570 469 |
| + 8 1000000 10000000 444 460 522 603 546 522 603 981 |
| + 8 10000000 10000000 1099 1326 1184 1260 1436 1203 1258 1517 |
| +Mean: 666 749 634 704 824 638 698 992 |
| + |
| +bitsPerValue valueCount getCount Direct16 Packed32 Aligned32 Direct32 Packed64 Aligned64 Direct64 |
| + 9 1000 10000000 474 540 575 473 540 566 470 |
| + 9 1000000 10000000 461 522 610 562 524 603 980 |
| + 9 10000000 10000000 1323 1234 1374 1438 1249 1312 1523 |
| + 15 1000 10000000 474 541 576 474 542 567 476 |
| + 15 1000000 10000000 460 540 619 584 532 621 984 |
| + 15 10000000 10000000 1330 1396 1494 1441 1411 1490 1525 |
| + 16 1000 10000000 475 541 576 474 541 566 470 |
| + 16 1000000 10000000 467 545 619 541 536 615 982 |
| + 16 10000000 10000000 1324 1405 1490 1438 1420 1483 1519 |
| +Mean: 754 807 881 825 810 869 992 |
| + |
| +bitsPerValue valueCount getCount Packed32 Aligned32 Direct32 Packed64 Aligned64 Direct64 |
| + 17 1000 10000000 541 576 474 542 565 469 |
| + 17 1000000 10000000 542 710 592 537 623 984 |
| + 17 10000000 10000000 1423 1612 1444 1440 1546 1524 |
| + 28 1000 10000000 542 577 475 542 567 470 |
| + 28 1000000 10000000 581 715 573 588 739 985 |
| + 28 10000000 10000000 1507 1607 1440 1520 1607 1518 |
| + 31 1000 10000000 541 576 474 543 566 471 |
| + 31 1000000 10000000 608 708 585 624 722 983 |
| + 31 10000000 10000000 1516 1610 1443 1532 1603 1525 |
| +Mean: 866 965 833 874 948 992 |
| + |
| +bitsPerValue valueCount getCount Direct32 Packed64 Aligned64 Direct64 |
| + 32 1000 10000000 475 542 568 471 |
| + 32 1000000 10000000 540 640 740 982 |
| + 32 10000000 10000000 1443 1538 1604 1522 |
| +Mean: 819 906 970 991 |
| + |
| +bitsPerValue valueCount getCount Packed64 Aligned64 Direct64 |
| + 33 1000 10000000 541 566 471 |
| + 33 1000000 10000000 635 1145 980 |
| + 33 10000000 10000000 1536 1670 1516 |
| + 47 1000 10000000 540 565 471 |
| + 47 1000000 10000000 883 1142 987 |
| + 47 10000000 10000000 1566 1664 1522 |
| + 49 1000 10000000 541 567 470 |
| + 49 1000000 10000000 915 1140 1067 |
| + 49 10000000 10000000 1571 1666 1520 |
| + 63 1000 10000000 542 568 471 |
| + 63 1000000 10000000 1084 1143 983 |
| + 63 10000000 10000000 1597 1671 1525 |
| +Mean: 995 1125 998 |
| + |
| +Total execution time: 726 seconds |
| + |
| + |
| +******************************************************************************** |
| +testSpeed result by Toke Eskildsen (te@statsbiblioteket.dk) 2010-02-26 |
| +Java 1.6.0_07-b06 64bit Server, default settings, Linux |
| +Server debit: Intel Xeon MP CPU @ 3.16GHz, 1 MB cache |
| +******************************************************************************** |
| +bitsPerValue valueCount getCount Direct8 Direct16 Packed32 Aligned32 Direct32 Packed64 Aligned64 Direct64 |
| + 1 1000 10000000 428 426 496 752 475 546 748 475 |
| + 1 1000000 10000000 932 1609 557 752 2090 579 747 2346 |
| + 1 10000000 10000000 2384 2526 1205 1375 2545 1189 1286 2726 |
| + 3 1000 10000000 469 469 529 736 480 535 726 486 |
| + 3 1000000 10000000 842 1589 599 785 2017 624 774 2294 |
| + 3 10000000 10000000 2417 2512 2222 2324 2548 2348 2307 2719 |
| + 4 1000 10000000 469 469 528 734 484 534 725 487 |
| + 4 1000000 10000000 853 1590 640 808 2022 647 795 2300 |
| + 4 10000000 10000000 2410 2517 2365 2429 2551 2509 2441 2720 |
| + 7 1000 10000000 469 469 528 734 483 534 724 488 |
| + 7 1000000 10000000 865 1594 850 1171 2017 1024 1082 2294 |
| + 7 10000000 10000000 2419 2513 2558 2666 2554 2723 2641 2713 |
| + 8 1000 10000000 469 470 529 735 484 535 726 496 |
| + 8 1000000 10000000 842 1572 1017 1095 2034 1032 1134 2302 |
| + 8 10000000 10000000 2440 2512 2619 2661 2555 2755 2663 2734 |
| +Mean: 1247 1522 1149 1317 1689 1207 1301 1838 |
| + |
| +bitsPerValue valueCount getCount Direct16 Packed32 Aligned32 Direct32 Packed64 Aligned64 Direct64 |
| + 9 1000 10000000 470 528 735 485 537 727 491 |
| + 9 1000000 10000000 1567 1052 1380 2024 1066 1204 2277 |
| + 9 10000000 10000000 2509 2635 2715 2551 2798 2685 2734 |
| + 15 1000 10000000 470 530 735 494 536 726 492 |
| + 15 1000000 10000000 1570 1658 1840 2037 1745 1859 2290 |
| + 15 10000000 10000000 2523 2715 2770 2545 2864 2769 2721 |
| + 16 1000 10000000 470 529 735 483 534 726 495 |
| + 16 1000000 10000000 1581 1730 1852 2044 1813 1857 2288 |
| + 16 10000000 10000000 2515 2724 2771 2547 2886 2775 2723 |
| +Mean: 1519 1566 1725 1690 1642 1703 1834 |
| + |
| +bitsPerValue valueCount getCount Packed32 Aligned32 Direct32 Packed64 Aligned64 Direct64 |
| + 17 1000 10000000 528 736 485 537 728 495 |
| + 17 1000000 10000000 1770 2339 2017 1886 2107 2300 |
| + 17 10000000 10000000 2731 2888 2549 2900 2799 2693 |
| + 28 1000 10000000 528 734 481 535 726 487 |
| + 28 1000000 10000000 2190 2353 2037 2310 2354 2297 |
| + 28 10000000 10000000 2783 2848 2534 2956 2863 2723 |
| + 31 1000 10000000 527 734 492 535 725 488 |
| + 31 1000000 10000000 2230 2347 2020 2361 2340 2276 |
| + 31 10000000 10000000 2813 2865 2547 2957 2858 2720 |
| +Mean: 1788 1982 1684 1886 1944 1831 |
| + |
| +bitsPerValue valueCount getCount Direct32 Packed64 Aligned64 Direct64 |
| + 32 1000 10000000 482 536 727 487 |
| + 32 1000000 10000000 2040 2406 2353 2297 |
| + 32 10000000 10000000 2565 2972 2882 2722 |
| +Mean: 1695 1971 1987 1835 |
| + |
| +bitsPerValue valueCount getCount Packed64 Aligned64 Direct64 |
| + 33 1000 10000000 535 726 494 |
| + 33 1000000 10000000 2430 2608 2297 |
| + 33 10000000 10000000 2990 3042 2714 |
| + 47 1000 10000000 535 725 494 |
| + 47 1000000 10000000 2573 2601 2284 |
| + 47 10000000 10000000 3080 3032 2730 |
| + 49 1000 10000000 536 726 493 |
| + 49 1000000 10000000 2609 2620 2303 |
| + 49 10000000 10000000 3067 3029 2706 |
| + 63 1000 10000000 535 724 488 |
| + 63 1000000 10000000 2687 2598 2294 |
| + 63 10000000 10000000 3153 3048 2726 |
| +Mean: 2060 2123 1835 |
| + |
| +Total execution time: 1418 seconds |
| Index: src/java/org/apache/lucene/util/packed/Aligned64.java |
| =================================================================== |
| --- src/java/org/apache/lucene/util/packed/Aligned64.java Fri Feb 26 13:28:17 CET 2010 |
| +++ src/java/org/apache/lucene/util/packed/Aligned64.java Fri Feb 26 13:28:17 CET 2010 |
| @@ -0,0 +1,190 @@ |
| +package org.apache.lucene.util.packed; |
| + |
| +import org.apache.lucene.store.IndexInput; |
| +import org.apache.lucene.util.RamUsageEstimator; |
| + |
| +import java.io.IOException; |
| +import java.util.Arrays; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +/** |
| + * Medium space and speed trade off. No values crosses block boundaries. |
| + * </p><p> |
| + * The implementation strives to avoid conditionals and expensive operations, |
| + * sacrificing code clarity to achieve better performance. |
| + * </p><p> |
| + * Space is optimally used within the boundaries of alignment, e.g. |
| + * 7 bits/value fits 7 values/block for 64 bit. |
| + * Bits are packed left-aligned to be bit pattern compatible with other bit |
| + * array implementations where possible. |
| + */ |
| +class Aligned64 extends PackedInts.ReaderImpl |
| + implements PackedInts.Mutable { |
| + static final int BLOCK_SIZE = 64; // 32 = int, 64 = long |
| + |
| + private static final int ENTRY_SIZE = BLOCK_SIZE + 1; |
| + |
| + /* |
| + * A value is always positioned inside a single block, requiring a |
| + * shift right to position the bits and a mask to extract them. |
| + */ |
| + private static final int[][] SHIFTS = new int[ENTRY_SIZE][ENTRY_SIZE]; |
| + private static final long[] READ_MASKS = new long[ENTRY_SIZE]; |
| + |
| + static { // Generate shifts |
| + for (int elementBits = 1 ; elementBits <= BLOCK_SIZE ; elementBits++) { |
| + int[] currentShifts = SHIFTS[elementBits]; |
| + for (int bitPos = 0 ; bitPos < BLOCK_SIZE ; bitPos++) { |
| + currentShifts[bitPos] = BLOCK_SIZE - elementBits - bitPos ; |
| +// System.out.println("elementBits=" + elementBits + ", bitPos=" + bitPos + ", shift=" + currentShifts[bitPos]); |
| + READ_MASKS[elementBits] = ~(~0L << elementBits); |
| + } |
| + } |
| + } |
| + |
| + /* |
| + * Setting a value requires clearing the destination bits with a mask, then |
| + * shifting the value to the left and or'ing the two numbers. |
| + */ |
| + private static final long[][] WRITE_MASKS = new long[ENTRY_SIZE][ENTRY_SIZE]; |
| + static { |
| + for (int elementBits = 1 ; elementBits <= BLOCK_SIZE ; elementBits++) { |
| + long elementPosMask = ~(~0L << elementBits); |
| + int[] currentShifts = SHIFTS[elementBits]; |
| + long[] currentMasks = WRITE_MASKS[elementBits]; |
| + for (int bitPos = 0 ; bitPos < BLOCK_SIZE ; bitPos++) { |
| + currentMasks[bitPos] = ~(elementPosMask << currentShifts[bitPos]); |
| + } |
| + } |
| + } |
| + |
| + /* The bits */ |
| + private long[] blocks; |
| + |
| + /* Cached values */ |
| + private int valuesPerBlock; |
| + private int[] shifts; |
| + private long readMask; |
| + private long[] writeMasks; |
| + |
| + /** |
| + * Creates an array with the internal structures adjusted for the given |
| + * limits and initialized to 0. |
| + * @param valueCount the number of values. |
| + * @param bitsPerValue the number of bits available for any given value. |
| + */ |
| + public Aligned64(int valueCount, int bitsPerValue) { |
| + super(valueCount, bitsPerValue); |
| + blocks = new long[size(valueCount, bitsPerValue)]; |
| + updateCached(); |
| + } |
| + |
| + private static int size(int valueCount, int bitsPerValue) { |
| + int valuesPerBlock = BLOCK_SIZE / bitsPerValue; |
| + return valueCount == 0 ? 0 : (valueCount-1) / valuesPerBlock + 1; |
| + } |
| + |
| + /** |
| + * Creates an array with content retrieved from the given IndexInput. |
| + * @param in an IndexInput, positioned at the start of Packed64-content. |
| + * @param valueCount the number of elements. |
| + * @param bitsPerValue the number of bits available for any given value. |
| + * @throws java.io.IOException if the values for the backing array could not |
| + * be retrieved. |
| + */ |
| + public Aligned64(IndexInput in, int valueCount, int bitsPerValue) |
| + throws IOException { |
| + super(valueCount, bitsPerValue); |
| + int size = size(valueCount, bitsPerValue); |
| + blocks = new long[size]; |
| + for(int i = 0 ; i < size ; i++) { |
| + blocks[i] = in.readLong(); |
| +// System.out.println("Reading @bit64: " + Long.toBinaryString((blocks[i])) + " (" + blocks[i] + ")"); |
| + } |
| + in.readLong(); // The extra long if for packed-compatibility |
| + updateCached(); |
| + } |
| + |
| + |
| + /** |
| + * Creates an array backed by the given blocks. |
| + * </p><p> |
| + * Note: The blocks are used directly, so changes to the given block will |
| + * affect the Packed64-structure. |
| + * @param blocks used as the internal backing array. |
| + * @param valueCount the number of values. |
| + * @param bitsPerValue the number of bits available for any given value. |
| + */ |
| + public Aligned64(long[] blocks, int valueCount, int bitsPerValue) { |
| + // TODO: Check that blocks.length is sufficient for holding length values |
| + super(valueCount, bitsPerValue); |
| + this.blocks = blocks; |
| + updateCached(); |
| + } |
| + |
| + private void updateCached() { |
| + valuesPerBlock = BLOCK_SIZE / bitsPerValue; |
| + shifts = SHIFTS[bitsPerValue]; |
| + readMask = READ_MASKS[bitsPerValue]; |
| + writeMasks = WRITE_MASKS[bitsPerValue]; |
| + } |
| + |
| + /** |
| + * @param index the position of the value. |
| + * @return the value at the given index. |
| + */ |
| + public long get(final int index) { |
| + final int blockPos = index / valuesPerBlock; |
| + final int bitPos = (index - (blockPos * valuesPerBlock)) * bitsPerValue; |
| + |
| + return (blocks[blockPos] >>> shifts[bitPos]) & readMask; |
| + } |
| + |
| + public void set(final int index, final long value) { |
| + final int blockPos = index / valuesPerBlock; |
| + final int bitPos = (index - (blockPos * valuesPerBlock)) * bitsPerValue; |
| + |
| + blocks[blockPos] = (blocks[blockPos] & writeMasks[bitPos]) |
| + | (value << shifts[bitPos]); |
| + } |
| + |
| + public void clear() { |
| + Arrays.fill(blocks, 0); |
| + } |
| + |
| + public long ramBytesUsed() { |
| + return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER |
| + + blocks.length * RamUsageEstimator.NUM_BYTES_INT; |
| + } |
| + |
| + public String toString() { |
| + return "Aligned64(" + valueCount + " values at " |
| + + bitsPerValue + " bits/value)"; |
| + } |
| + |
| + /** |
| + * The backing array contains the bits for the values in this structure. |
| + * The array is returned directly, so any changes will be reflected both ways. |
| + * Expert use only. |
| + * @return the backing array. |
| + */ |
| + long[] getBackingArray() { |
| + return blocks; |
| + } |
| +} |
| \ No newline at end of file |
| Index: src/java/org/apache/lucene/util/packed/AlignedWriter.java |
| =================================================================== |
| --- src/java/org/apache/lucene/util/packed/AlignedWriter.java Fri Feb 26 13:08:36 CET 2010 |
| +++ src/java/org/apache/lucene/util/packed/AlignedWriter.java Fri Feb 26 13:08:36 CET 2010 |
| @@ -0,0 +1,115 @@ |
| +package org.apache.lucene.util.packed; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import org.apache.lucene.store.IndexOutput; |
| + |
| +import java.io.IOException; |
| + |
| +// Packs high order byte first, to match |
| +// IndexOutput.writeInt/Long/Short byte order |
| + |
| +/** |
| + * Generic writer for block-aligned values: Bits for values are stored so |
| + * that block-boundaries are never crossed. For some number of bits, this means |
| + * wasted space in the blocks. |
| + * </p><p> |
| + * The bits for values are stored left-aligned in the blocks, in order to be |
| + * bit-pattern compatible with byte, short, int and long-backed implementations |
| + * as well as packed for 1, 2, 4, 8, 16, 32 and 64 bits/value. |
| + */ |
| +class AlignedWriter extends PackedInts.Writer { |
| + private final PackedInts.BLOCK blockPref; |
| + private long pending = 0; |
| + private int pendingBitPos = 0; |
| + private int written = 0; |
| + private long flushedInts = 0; |
| + |
| + public AlignedWriter(IndexOutput out, int valueCount, |
| + int bitsPerValue, PackedInts.BLOCK blockPref) |
| + throws IOException { |
| + super(out, valueCount, bitsPerValue, |
| + blockPref == PackedInts.BLOCK.bit32 ? |
| + PackedInts.PERSISTENCE.aligned32 : |
| + PackedInts.PERSISTENCE.aligned64); |
| + this.blockPref = blockPref; |
| + } |
| + |
| + @Override |
| + public void add(long value) throws IOException { |
| +// System.out.println("Adding " + value + " to " + this); |
| + |
| + // TODO: Consider caching maxValue and bits/block |
| + assert value <= PackedInts.maxValue(bitsPerValue) : "value=" + value |
| + + " maxValue=" + PackedInts.maxValue(bitsPerValue); |
| + assert value >= 0; |
| + assert written <= valueCount : "The number of values to write has been " + |
| + "exceeded, expected number of values: " + valueCount; |
| + pending |= value << (64 - pendingBitPos - bitsPerValue); |
| + pendingBitPos += bitsPerValue; |
| + if (pendingBitPos > blockPref.getBits() - bitsPerValue) { |
| + flush(); |
| + } |
| + written++; |
| + } |
| + |
| + @Override |
| + public void finish() throws IOException { |
| + while (written < valueCount) { |
| + add(0L); |
| + } |
| +/* assert written == valueCount : |
| + valueCount + " values should be added, but only " + written |
| + + " has been received";*/ |
| + if (pendingBitPos != 0) { // Flush pending |
| + flush(); |
| + } |
| + if (flushedInts % 2 != 0) { // Align to long |
| + out.writeInt(0); |
| + } |
| + out.writeLong(0L); // Dummy last element to be compatible with packed |
| + } |
| + |
| + private void flush() throws IOException { |
| + // TODO: Align to 64 bit |
| + switch (blockPref) { |
| + case bit32: { |
| + out.writeInt((int)(pending >>> 32)); |
| +// System.out.println("Flushing @" + blockPref + ": " + Integer.toBinaryString((int)(pending >>> 32))); |
| + flushedInts++; |
| + break; |
| + } |
| + case bit64: { |
| + out.writeLong(pending); |
| +// System.out.println("Flushing @" + blockPref + ": " + Long.toBinaryString((pending)) + " (" + pending + ")"); |
| + flushedInts += 2; |
| + break; |
| + } |
| + default: throw new UnsupportedOperationException( |
| + "The BLOCK " + blockPref + " is unsupported"); |
| + } |
| + pending = 0; |
| + pendingBitPos = 0; |
| + } |
| + |
| + public String toString() { |
| + return "AlignedWriter" + blockPref.getBits() |
| + + "(written " + written + "/" + valueCount + " with " |
| + + bitsPerValue + " bits/value)"; |
| + } |
| +} |
| \ No newline at end of file |
| Index: src/java/org/apache/lucene/util/packed/Direct16.java |
| =================================================================== |
| --- src/java/org/apache/lucene/util/packed/Direct16.java Mon Feb 22 08:42:35 CET 2010 |
| +++ src/java/org/apache/lucene/util/packed/Direct16.java Mon Feb 22 08:42:35 CET 2010 |
| @@ -0,0 +1,86 @@ |
| +package org.apache.lucene.util.packed; |
| + |
| +import org.apache.lucene.store.IndexInput; |
| +import org.apache.lucene.store.IndexOutput; |
| +import org.apache.lucene.util.RamUsageEstimator; |
| + |
| +import java.io.IOException; |
| +import java.util.Arrays; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +/** |
| + * Direct wrapping of 16 bit values to a backing array of shorts. |
| + */ |
| +class Direct16 extends PackedInts.ReaderImpl |
| + implements PackedInts.Mutable { |
| + private short[] blocks; |
| + private static final int BITS_PER_VALUE = 16; |
| + |
| + public Direct16(int valueCount) { |
| + super(valueCount, BITS_PER_VALUE); |
| + blocks = new short[valueCount]; |
| + } |
| + |
| + public Direct16(IndexInput in, int valueCount) throws IOException { |
| + super(valueCount, BITS_PER_VALUE); |
| + short[] blocks = new short[valueCount]; |
| + for(int i=0;i<valueCount;i++) { |
| + blocks[i] = in.readShort(); |
| + } |
| + final int mod = valueCount % 4; |
| + if (mod != 0) { |
| + final int pad = 4-mod; |
| + // round out long |
| + for(int i=0;i<pad;i++) { |
| + in.readShort(); |
| + } |
| + } |
| + |
| + this.blocks = blocks; |
| + } |
| + |
| + /** |
| + * Creates an array backed by the given blocks. |
| + * </p><p> |
| + * Note: The blocks are used directly, so changes to the given block will |
| + * affect the structure. |
| + * @param blocks used as the internal backing array. |
| + */ |
| + public Direct16(short[] blocks) { |
| + super(blocks.length, BITS_PER_VALUE); |
| + this.blocks = blocks; |
| + } |
| + |
| + public long get(final int index) { |
| + return 0xFFFFL & blocks[index]; |
| + } |
| + |
| + public void set(final int index, final long value) { |
| + blocks[index] = (short)(value & 0xFFFF); |
| + } |
| + |
| + public long ramBytesUsed() { |
| + return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + |
| + blocks.length * RamUsageEstimator.NUM_BYTES_SHORT; |
| + } |
| + |
| + public void clear() { |
| + Arrays.fill(blocks, (short)0); |
| + } |
| +} |
| \ No newline at end of file |
| Index: src/java/org/apache/lucene/util/packed/PackedWriter.java |
| =================================================================== |
| --- src/java/org/apache/lucene/util/packed/PackedWriter.java Tue Feb 23 15:42:13 CET 2010 |
| +++ src/java/org/apache/lucene/util/packed/PackedWriter.java Tue Feb 23 15:42:13 CET 2010 |
| @@ -0,0 +1,116 @@ |
| +package org.apache.lucene.util.packed; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import org.apache.lucene.store.IndexOutput; |
| + |
| +import java.io.IOException; |
| + |
| +// Packs high order byte first, to match |
| +// IndexOutput.writeInt/Long/Short byte order |
| + |
| +/** |
| + * Generic writer for space-optimal packed values. The resulting bits can be |
| + * used directly by Packed32, Packed64 and PackedDirect* and will always be |
| + * long-aligned. |
| + */ |
| +class PackedWriter extends PackedInts.Writer { |
| + private long pending; |
| + private int pendingBitPos; |
| + |
| + // masks[n-1] masks for bottom n bits |
| + private final long[] masks; |
| + private int written = 0; |
| + |
| + // nocommit -- allow minValue too? ie not just minValue==0 |
| + |
| + public PackedWriter(IndexOutput out, int valueCount, int bitsPerValue) |
| + throws IOException { |
| + |
| + super(out, valueCount, bitsPerValue, PackedInts.PERSISTENCE.packed); |
| + |
| + pendingBitPos = 64; |
| + masks = new long[bitsPerValue - 1]; |
| + |
| + int v = 1; |
| + for (int i = 0; i < bitsPerValue - 1; i++) { |
| + v *= 2; |
| + masks[i] = v - 1; |
| + } |
| + } |
| + |
| + /** |
| + * Do not call this after finish |
| + */ |
| + @Override |
| + public void add(long v) throws IOException { |
| + assert v <= PackedInts.maxValue(bitsPerValue) : "v=" + v |
| + + " maxValue=" + PackedInts.maxValue(bitsPerValue); |
| + assert v >= 0; |
| + //System.out.println(" packedw add v=" + v + " pendingBitPos=" + pendingBitPos); |
| + |
| + // TODO |
| + if (pendingBitPos >= bitsPerValue) { |
| + // not split |
| + |
| + // write-once, so we can |= w/o first masking to 0s |
| + pending |= v << (pendingBitPos - bitsPerValue); |
| + if (pendingBitPos == bitsPerValue) { |
| + // flush |
| + out.writeLong(pending); |
| + pending = 0; |
| + pendingBitPos = 64; |
| + } else { |
| + pendingBitPos -= bitsPerValue; |
| + } |
| + |
| + } else { |
| + // split |
| + |
| + // write top pendingBitPos bits of value into bottom bits of pending |
| + pending |= (v >> (bitsPerValue - pendingBitPos)) & masks[pendingBitPos - 1]; |
| + //System.out.println(" part1 (v >> " + (bitsPerValue - pendingBitPos) + ") & " + masks[pendingBitPos-1]); |
| + |
| + // flush |
| + out.writeLong(pending); |
| + |
| + // write bottom (bitsPerValue - pendingBitPos) bits of value into top bits of pending |
| + pendingBitPos = 64 - bitsPerValue + pendingBitPos; |
| + //System.out.println(" part2 v << " + pendingBitPos); |
| + pending = (v << pendingBitPos); |
| + } |
| + written++; |
| + } |
| + |
| + @Override |
| + public void finish() throws IOException { |
| + while (written < valueCount) { |
| + add(0L); // Auto flush |
| + } |
| + |
| + if (pendingBitPos != 64) { |
| + out.writeLong(pending); |
| + } |
| + out.writeLong(0L); // Dummy to compensate for not using conditionals |
| + } |
| + |
| + public String toString() { |
| + return "PackedWriter(written " + written + "/" + valueCount + " with " |
| + + bitsPerValue + " bits/value)"; |
| + } |
| +} |
| \ No newline at end of file |
| Index: src/java/org/apache/lucene/util/packed/Direct32.java |
| =================================================================== |
| --- src/java/org/apache/lucene/util/packed/Direct32.java Mon Feb 22 08:42:35 CET 2010 |
| +++ src/java/org/apache/lucene/util/packed/Direct32.java Mon Feb 22 08:42:35 CET 2010 |
| @@ -0,0 +1,82 @@ |
| +package org.apache.lucene.util.packed; |
| + |
| +import org.apache.lucene.store.IndexInput; |
| +import org.apache.lucene.store.IndexOutput; |
| +import org.apache.lucene.util.RamUsageEstimator; |
| + |
| +import java.io.IOException; |
| +import java.util.Arrays; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +/** |
| + * Direct wrapping of 32 bit values to a backing array of ints. |
| + */ |
| +class Direct32 extends PackedInts.ReaderImpl |
| + implements PackedInts.Mutable { |
| + private int[] blocks; |
| + private static final int BITS_PER_VALUE = 32; |
| + |
| + public Direct32(int valueCount) { |
| + super(valueCount, BITS_PER_VALUE); |
| + blocks = new int[valueCount]; |
| + } |
| + |
| + public Direct32(IndexInput in, int valueCount) throws IOException { |
| + super(valueCount, BITS_PER_VALUE); |
| + int[] blocks = new int[valueCount]; |
| + for(int i=0;i<valueCount;i++) { |
| + blocks[i] = in.readInt(); |
| + } |
| + final int mod = valueCount % 2; |
| + if (mod != 0) { |
| + in.readInt(); |
| + } |
| + |
| + this.blocks = blocks; |
| + } |
| + |
| + /** |
| + * Creates an array backed by the given blocks. |
| + * </p><p> |
| + * Note: The blocks are used directly, so changes to the given block will |
| + * affect the structure. |
| + * @param blocks used as the internal backing array. |
| + */ |
| + public Direct32(int[] blocks) { |
| + super(blocks.length, BITS_PER_VALUE); |
| + this.blocks = blocks; |
| + } |
| + |
| + public long get(final int index) { |
| + return 0xFFFFFFFFL & blocks[index]; |
| + } |
| + |
| + public void set(final int index, final long value) { |
| + blocks[index] = (int)(value & 0xFFFFFFFF); |
| + } |
| + |
| + public long ramBytesUsed() { |
| + return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + |
| + blocks.length * RamUsageEstimator.NUM_BYTES_INT; |
| + } |
| + |
| + public void clear() { |
| + Arrays.fill(blocks, 0); |
| + } |
| +} |
| \ No newline at end of file |
| Index: src/java/org/apache/lucene/util/BytesRef.java |
| =================================================================== |
| --- src/java/org/apache/lucene/util/BytesRef.java Fri Jan 22 12:58:35 CET 2010 |
| +++ src/java/org/apache/lucene/util/BytesRef.java Fri Jan 22 12:58:35 CET 2010 |
| @@ -0,0 +1,170 @@ |
| +package org.apache.lucene.util; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import java.io.UnsupportedEncodingException; |
| + |
| +// nocommit -- share w/ flex's TermRef |
| +public class BytesRef { |
| + |
| + public byte[] bytes; |
| + public int offset; |
| + public int length; |
| + |
| + public abstract static class Comparator { |
| + abstract public int compare(BytesRef a, BytesRef b); |
| + } |
| + |
| + public BytesRef() { |
| + } |
| + |
| + /** Creates bytes ref, wrapping UTF8 bytes from the |
| + * provided string. */ |
| + public BytesRef(String s) { |
| + try { |
| + bytes = s.getBytes("UTF-8"); |
| + } catch (UnsupportedEncodingException uee) { |
| + throw new RuntimeException(uee); |
| + } |
| + offset = 0; |
| + length = bytes.length; |
| + } |
| + |
| + public BytesRef(BytesRef other) { |
| + offset = 0; |
| + length = other.length; |
| + bytes = new byte[other.length]; |
| + System.arraycopy(other.bytes, other.offset, bytes, 0, length); |
| + } |
| + |
| + public boolean bytesEquals(BytesRef other) { |
| + if (length == other.length) { |
| + int upto = offset; |
| + int otherUpto = other.offset; |
| + final byte[] otherBytes = other.bytes; |
| + for(int i=0;i<length;i++) { |
| + if (bytes[upto++] != otherBytes[otherUpto++]) { |
| + return false; |
| + } |
| + } |
| + return true; |
| + } else { |
| + return false; |
| + } |
| + } |
| + |
| + public String utf8ToString() { |
| + try { |
| + return new String(bytes, offset, length, "UTF8"); |
| + } catch (java.io.UnsupportedEncodingException uee) { |
| + throw new RuntimeException(uee); |
| + } |
| + } |
| + |
| + private final static Comparator straightComparator = new StraightComparator(); |
| + |
| + public static Comparator getStraightComparator() { |
| + return straightComparator; |
| + } |
| + |
| + public static class StraightComparator extends Comparator { |
| + public int compare(BytesRef a, BytesRef b) { |
| + int aUpto = a.offset; |
| + int bUpto = b.offset; |
| + final int aStop; |
| + if (a.length <= b.length) { |
| + aStop = aUpto + a.length; |
| + } else { |
| + aStop = aUpto + b.length; |
| + } |
| + while(aUpto < aStop) { |
| + final int cmp = a.bytes[aUpto++] - b.bytes[bUpto++]; |
| + if (cmp != 0) { |
| + return cmp; |
| + } |
| + } |
| + return a.length - b.length; |
| + } |
| + } |
| + |
| + private final static Comparator utf8SortedAsUTF16SortOrder = new UTF8SortedAsUTF16Comparator(); |
| + |
| + public static Comparator getUTF8SortedAsUTF16Comparator() { |
| + return utf8SortedAsUTF16SortOrder; |
| + } |
| + |
| + public static class UTF8SortedAsUTF16Comparator extends Comparator { |
| + public int compare(BytesRef a, BytesRef b) { |
| + |
| + final byte[] aBytes = a.bytes; |
| + int aUpto = a.offset; |
| + final byte[] bBytes = b.bytes; |
| + int bUpto = b.offset; |
| + |
| + final int aStop; |
| + if (a.length < b.length) { |
| + aStop = aUpto + a.length; |
| + } else { |
| + aStop = aUpto + b.length; |
| + } |
| + |
| + while(aUpto < aStop) { |
| + int aByte = aBytes[aUpto++] & 0xff; |
| + int bByte = bBytes[bUpto++] & 0xff; |
| + |
| + if (aByte != bByte) { |
| + // See http://icu-project.org/docs/papers/utf16_code_point_order.html#utf-8-in-utf-16-order |
| + // We know the terms are not equal, but, we may |
| + // have to carefully fixup the bytes at the |
| + // difference to match UTF16's sort order: |
| + if (aByte >= 0xee && bByte >= 0xee) { |
| + if ((aByte & 0xfe) == 0xee) { |
| + aByte += 0x10; |
| + } |
| + if ((bByte&0xfe) == 0xee) { |
| + bByte += 0x10; |
| + } |
| + } |
| + return aByte - bByte; |
| + } |
| + } |
| + |
| + // One is a prefix of the other, or, they are equal: |
| + return a.length - b.length; |
| + } |
| + } |
| + |
| + // nocommit -- kinda hackish? needed only (so far) for FieldComparator |
| + private static class ComparableBytesRef implements Comparable { |
| + private final BytesRef b; |
| + private final Comparator c; |
| + public ComparableBytesRef(BytesRef b, Comparator c) { |
| + this.b = b; |
| + this.c = c; |
| + } |
| + |
| + public int compareTo(Object other) { |
| + final ComparableBytesRef o = (ComparableBytesRef) other; |
| + return c.compare(b, o.b); |
| + } |
| + } |
| + |
| + public static Comparable getComparableBytesRef(BytesRef b, Comparator c) { |
| + return new ComparableBytesRef(b, c); |
| + } |
| +} |
| Index: src/java/org/apache/lucene/util/packed/Direct64.java |
| =================================================================== |
| --- src/java/org/apache/lucene/util/packed/Direct64.java Mon Feb 22 08:42:35 CET 2010 |
| +++ src/java/org/apache/lucene/util/packed/Direct64.java Mon Feb 22 08:42:35 CET 2010 |
| @@ -0,0 +1,79 @@ |
| +package org.apache.lucene.util.packed; |
| + |
| +import org.apache.lucene.store.IndexInput; |
| +import org.apache.lucene.store.IndexOutput; |
| +import org.apache.lucene.util.RamUsageEstimator; |
| + |
| +import java.io.IOException; |
| +import java.util.Arrays; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +/** |
| + * Direct wrapping of 32 bit values to a backing array of ints. |
| + */ |
| +class Direct64 extends PackedInts.ReaderImpl |
| + implements PackedInts.Mutable { |
| + private long[] blocks; |
| + private static final int BITS_PER_VALUE = 64; |
| + |
| + public Direct64(int valueCount) { |
| + super(valueCount, BITS_PER_VALUE); |
| + blocks = new long[valueCount]; |
| + } |
| + |
| + public Direct64(IndexInput in, int valueCount) throws IOException { |
| + super(valueCount, BITS_PER_VALUE); |
| + long[] blocks = new long[valueCount]; |
| + for(int i=0;i<valueCount;i++) { |
| + blocks[i] = in.readLong(); |
| + } |
| + |
| + this.blocks = blocks; |
| + } |
| + |
| + |
| + /** |
| + * Creates an array backed by the given blocks. |
| + * </p><p> |
| + * Note: The blocks are used directly, so changes to the given block will |
| + * affect the structure. |
| + * @param blocks used as the internal backing array. |
| + */ |
| + public Direct64(long[] blocks) { |
| + super(blocks.length, BITS_PER_VALUE); |
| + this.blocks = blocks; |
| + } |
| + |
| + public long get(final int index) { |
| + return blocks[index]; |
| + } |
| + |
| + public void set(final int index, final long value) { |
| + blocks[index] = value; |
| + } |
| + |
| + public long ramBytesUsed() { |
| + return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + |
| + blocks.length * RamUsageEstimator.NUM_BYTES_LONG; |
| + } |
| + |
| + public void clear() { |
| + Arrays.fill(blocks, 0L); |
| + } |
| +} |
| \ No newline at end of file |
| Index: src/java/org/apache/lucene/util/packed/Packed64.java |
| =================================================================== |
| --- src/java/org/apache/lucene/util/packed/Packed64.java Fri Feb 26 13:29:32 CET 2010 |
| +++ src/java/org/apache/lucene/util/packed/Packed64.java Fri Feb 26 13:29:32 CET 2010 |
| @@ -0,0 +1,210 @@ |
| +package org.apache.lucene.util.packed; |
| + |
| +import org.apache.lucene.store.IndexInput; |
| +import org.apache.lucene.util.RamUsageEstimator; |
| + |
| +import java.io.IOException; |
| +import java.util.Arrays; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +/** |
| + * Space optimized random access capable array of values with a fixed number of |
| + * bits. For 32 bits/value and less, performance on 32 bit machines is not |
| + * optimal. Consider using {@link Packed32} for such a setup. |
| + * </p><p> |
| + * The implementation strives to avoid conditionals and expensive operations, |
| + * sacrificing code clarity to achieve better performance. |
| + */ |
| +class Packed64 extends PackedInts.ReaderImpl implements PackedInts.Mutable { |
| + static final int BLOCK_SIZE = 64; // 32 = int, 64 = long |
| + static final int BLOCK_BITS = 6; // The #bits representing BLOCK_SIZE |
| + static final int MOD_MASK = BLOCK_SIZE - 1; // x % BLOCK_SIZE |
| + |
| + private static final int ENTRY_SIZE = BLOCK_SIZE + 1; |
| + private static final int FAC_BITPOS = 3; |
| + |
| + /* |
| + * In order to make an efficient value-getter, conditionals should be |
| + * avoided. A value can be positioned inside of a block, requiring shifting |
| + * left or right or it can span two blocks, requiring a left-shift on the |
| + * first block and a right-shift on the right block. |
| + * </p><p> |
| + * By always shifting the first block both left and right, we get exactly |
| + * the right bits. By always shifting the second block right and applying |
| + * a mask, we get the right bits there. After that, we | the two bitsets. |
| + */ |
| + private static final int[][] SHIFTS = |
| + new int[ENTRY_SIZE][ENTRY_SIZE * FAC_BITPOS]; |
| + //new int[BLOCK_SIZE+1][BLOCK_SIZE][BLOCK_SIZE+1]; |
| + private static final long[][] MASKS = new long[ENTRY_SIZE][ENTRY_SIZE]; |
| + |
| + static { // Generate shifts |
| + for (int elementBits = 1 ; elementBits <= BLOCK_SIZE ; elementBits++) { |
| + for (int bitPos = 0 ; bitPos < BLOCK_SIZE ; bitPos++) { |
| + int[] currentShifts = SHIFTS[elementBits]; |
| + int base = bitPos * FAC_BITPOS; |
| + currentShifts[base ] = bitPos; |
| + currentShifts[base + 1] = BLOCK_SIZE - elementBits; |
| + if (bitPos <= BLOCK_SIZE - elementBits) { // Single block |
| + currentShifts[base + 2] = 0; |
| + MASKS[elementBits][bitPos] = 0; |
| + } else { // Two blocks |
| + int rBits = elementBits - (BLOCK_SIZE - bitPos); |
| + currentShifts[base + 2] = BLOCK_SIZE - rBits; |
| + MASKS[elementBits][bitPos] = ~(~0L << rBits); |
| + } |
| + } |
| + } |
| + } |
| + |
| + /* |
| + * The setter requires more masking than the getter. |
| + */ |
| + private static final long[][] WRITE_MASKS = |
| + new long[ENTRY_SIZE][ENTRY_SIZE * FAC_BITPOS]; |
| + static { |
| + for (int elementBits = 1 ; elementBits <= BLOCK_SIZE ; elementBits++) { |
| + long elementPosMask = ~(~0L << elementBits); |
| + int[] currentShifts = SHIFTS[elementBits]; |
| + long[] currentMasks = WRITE_MASKS[elementBits]; |
| + for (int bitPos = 0 ; bitPos < BLOCK_SIZE ; bitPos++) { |
| + int base = bitPos * FAC_BITPOS; |
| + currentMasks[base ] =~((elementPosMask |
| + << currentShifts[base + 1]) |
| + >>> currentShifts[base]); |
| + currentMasks[base+1] = ~(elementPosMask |
| + << currentShifts[base + 2]); |
| + currentMasks[base+2] = currentShifts[base + 2] == 0 ? 0 : ~0; |
| + } |
| + } |
| + } |
| + |
| + /* The bits */ |
| + private long[] blocks; |
| + |
| + // Cached calculations |
| + private int maxPos; // blocks.length * BLOCK_SIZE / elementBits - 1 |
| + private int[] shifts; // The shifts for the current elementBits |
| + private long[] readMasks; |
| + private long[] writeMasks; |
| + |
| + /** |
| + * Creates an array with the internal structures adjusted for the given |
| + * limits and initialized to 0. |
| + * @param valueCount the number of elements. |
| + * @param bitsPerValue the number of bits available for any given value. |
| + */ |
| + public Packed64(int valueCount, int bitsPerValue) { |
| + // TODO: Test for edge-cases (2^31 values, 63 bitsPerValue) |
| + // +2 due to the avoid-conditionals-trick. The last entry is always 0 |
| + this(new long[(int)((long)valueCount * bitsPerValue / BLOCK_SIZE + 2)], |
| + valueCount, bitsPerValue); |
| + } |
| + |
| + |
| + /** |
| + * Creates an array backed by the given blocks. |
| + * </p><p> |
| + * Note: The blocks are used directly, so changes to the given block will |
| + * affect the Packed32-structure. |
| + * @param blocks used as the internal backing array. Not that the last |
| + * element cannot be addressed directly. |
| + * @param valueCount the number of values. |
| + * @param bitsPerValue the number of bits available for any given value. |
| + */ |
| + public Packed64(long[] blocks, int valueCount, int bitsPerValue) { |
| + super(valueCount, bitsPerValue); |
| + this.blocks = blocks; |
| + updateCached(); |
| + } |
| + |
| + /** |
| + * Creates an array with content retrieved from the given IndexInput. |
| + * @param in an IndexInput, positioned at the start of Packed64-content. |
| + * @param valueCount the number of elements. |
| + * @param bitsPerValue the number of bits available for any given value. |
| + * @throws java.io.IOException if the values for the backing array could not |
| + * be retrieved. |
| + */ |
| + public Packed64(IndexInput in, int valueCount, int bitsPerValue) |
| + throws IOException { |
| + super(valueCount, bitsPerValue); |
| + int size = size(valueCount, bitsPerValue); |
| + blocks = new long[size+1]; // +1 due to non-conditional tricks |
| + for(int i=0;i<size;i++) { |
| + blocks[i] = in.readLong(); |
| + } |
| + updateCached(); |
| + } |
| + |
| + private static int size(int valueCount, int bitsPerValue) { |
| + final long totBitCount = (long) valueCount * bitsPerValue; |
| + return (int)(totBitCount/64 + ((totBitCount % 64 == 0 ) ? 0:1)); |
| + } |
| + |
| + private void updateCached() { |
| + readMasks = MASKS[bitsPerValue]; |
| + shifts = SHIFTS[bitsPerValue]; |
| + writeMasks = WRITE_MASKS[bitsPerValue]; |
| + maxPos = (int)((((long)blocks.length) * BLOCK_SIZE / bitsPerValue) - 2); |
| + } |
| + |
| + /** |
| + * @param index the position of the value. |
| + * @return the value at the given index. |
| + */ |
| + public long get(final int index) { |
| + final long majorBitPos = index * bitsPerValue; |
| + final int elementPos = (int)(majorBitPos >>> BLOCK_BITS); // / BLOCK_SIZE |
| + final int bitPos = (int)(majorBitPos & MOD_MASK); // % BLOCK_SIZE); |
| + |
| + final int base = bitPos * FAC_BITPOS; |
| + |
| + return ((blocks[elementPos] << shifts[base]) >>> shifts[base+1]) | |
| + ((blocks[elementPos+1] >>> shifts[base+2]) & readMasks[bitPos]); |
| + } |
| + |
| + public void set(final int index, final long value) { |
| + final long majorBitPos = index * bitsPerValue; |
| + final int elementPos = (int)(majorBitPos >>> BLOCK_BITS); // / BLOCK_SIZE |
| + final int bitPos = (int)(majorBitPos & MOD_MASK); // % BLOCK_SIZE); |
| + final int base = bitPos * FAC_BITPOS; |
| + |
| + blocks[elementPos ] = (blocks[elementPos ] & writeMasks[base]) |
| + | (value << shifts[base + 1] >>> shifts[base]); |
| + blocks[elementPos+1] = (blocks[elementPos+1] & writeMasks[base+1]) |
| + | ((value << shifts[base + 2]) & writeMasks[base+2]); |
| + } |
| + |
| + public String toString() { |
| + return "Packed64(bitsPerValue=" + bitsPerValue + ", size=" |
| + + size() + ", maxPos=" + maxPos |
| + + ", elements.length=" + blocks.length + ")"; |
| + } |
| + |
| + public long ramBytesUsed() { |
| + return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER |
| + + blocks.length * RamUsageEstimator.NUM_BYTES_LONG; |
| + } |
| + |
| + public void clear() { |
| + Arrays.fill(blocks, 0L); |
| + } |
| + |
| +} |
| \ No newline at end of file |
| Index: src/java/org/apache/lucene/util/packed/package.html |
| =================================================================== |
| --- src/java/org/apache/lucene/util/packed/package.html Mon Feb 22 08:23:22 CET 2010 |
| +++ src/java/org/apache/lucene/util/packed/package.html Mon Feb 22 08:23:22 CET 2010 |
| @@ -0,0 +1,16 @@ |
| +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN"> |
| +<html> |
| +<head></head> |
| +<body bgcolor="white"> |
| + |
| +<p> |
| + The packed package provides random access capable arrays of positive longs. |
| + The implementations provides different trade offs between memory usage and |
| + access speed. The standard usage scenario is replacing large int or long |
| + arrays in order to reduce the memory footprint. |
| +</p><p> |
| + The main access point is the {@link PackedInts} factory. |
| +</p> |
| + |
| +</body> |
| +</html> |
| \ No newline at end of file |
| Index: src/test/org/apache/lucene/util/packed/TestPackedInts.java |
| =================================================================== |
| --- src/test/org/apache/lucene/util/packed/TestPackedInts.java Fri Feb 26 13:36:34 CET 2010 |
| +++ src/test/org/apache/lucene/util/packed/TestPackedInts.java Fri Feb 26 13:36:34 CET 2010 |
| @@ -0,0 +1,357 @@ |
| +package org.apache.lucene.util.packed; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import org.apache.lucene.store.*; |
| +import org.apache.lucene.util.LuceneTestCase; |
| + |
| +import java.io.StringWriter; |
| +import java.util.ArrayList; |
| +import java.util.List; |
| +import java.util.Random; |
| +import java.io.IOException; |
| + |
| +public class TestPackedInts extends LuceneTestCase { |
| + |
| +/* public void testBitsRequired() throws Exception { |
| + assertEquals(61, PackedInts.bitsRequired((long)Math.pow(2, 61)-1)); |
| + assertEquals(61, PackedInts.bitsRequired(0x1FFFFFFFFFFFFFFFL)); |
| + assertEquals(62, PackedInts.bitsRequired(0x3FFFFFFFFFFFFFFFL)); |
| + assertEquals(63, PackedInts.bitsRequired(0x7FFFFFFFFFFFFFFFL)); |
| + } */ |
| + |
| + public void testMaxValues() throws Exception { |
| + assertEquals("1 bit -> max == 1", |
| + 1, PackedInts.maxValue(1)); |
| + assertEquals("2 bit -> max == 3", |
| + 3, PackedInts.maxValue(2)); |
| + assertEquals("8 bit -> max == 255", |
| + 255, PackedInts.maxValue(8)); |
| + assertEquals("63 bit -> max == Long.MAX_VALUE", |
| + Long.MAX_VALUE, PackedInts.maxValue(63)); |
| + assertEquals("64 bit -> max == Long.MAX_VALUE (same as for 63 bit)", |
| + Long.MAX_VALUE, PackedInts.maxValue(63)); |
| + } |
| + |
| + public void testPackedInts() throws IOException { |
| + Random rand = newRandom(); |
| + for(int iter=0;iter<50;iter++) { |
| + long ceil = 2; |
| + // nocommit -- need to get the 64 bit case working |
| + for(int nbits=1;nbits<63;nbits++) { |
| + final int valueCount = 100+rand.nextInt(500); |
| + final Directory d = new MockRAMDirectory(); |
| + |
| + IndexOutput out = d.createOutput("out.bin"); |
| + PackedInts.Writer w = PackedInts.getWriter( |
| + out, valueCount, nbits, PackedInts.STORAGE.packed); |
| + |
| + final long[] values = new long[valueCount]; |
| + for(int i=0;i<valueCount;i++) { |
| + long v = rand.nextLong() % ceil; |
| + if (v < 0) { |
| + v = -v; |
| + } |
| + values[i] = v; |
| + w.add(values[i]); |
| + } |
| + w.finish(); |
| + out.close(); |
| + |
| + IndexInput in = d.openInput("out.bin"); |
| + PackedInts.Reader r = PackedInts.getReader(in); |
| + for(int i=0;i<valueCount;i++) { |
| + assertEquals("index=" + i + " ceil=" + ceil + " valueCount=" |
| + + valueCount + " nbits=" + nbits + " for " |
| + + r.getClass().getSimpleName(), values[i], r.get(i)); |
| + } |
| + in.close(); |
| + ceil *= 2; |
| + } |
| + } |
| + } |
| + |
| + public void testAligned64Writer() throws IOException { |
| + final Directory d = new MockRAMDirectory(); |
| + |
| + long[] INPUT = new long[]{1, 0, 1}; |
| + IndexOutput out = d.createOutput("out.bin"); |
| + PackedInts.Writer w = PackedInts.getWriter( |
| + out, INPUT.length, 1, PackedInts.STORAGE.aligned64); |
| + for (long input: INPUT) { |
| + w.add(input); |
| + } |
| + w.finish(); |
| + out.close(); |
| + |
| + IndexInput in = d.openInput("out.bin"); |
| + PackedInts.Reader r = PackedInts.getReader(in); |
| + assertEquals("The first stored bit should be retrievable", 1, r.get(0)); |
| + assertEquals("The second stored bit should be retrievable", 0, r.get(1)); |
| + in.close(); |
| + } |
| + |
| + public void testControlledEquality() { |
| + final int VALUE_COUNT = 255; |
| + final int BITS_PER_VALUE = 8; |
| + |
| + List<PackedInts.Mutable> packedInts = |
| + createPackedInts(VALUE_COUNT, BITS_PER_VALUE); |
| + for (PackedInts.Mutable packedInt: packedInts) { |
| + for (int i = 0 ; i < packedInt.size() ; i++) { |
| + packedInt.set(i, i+1); |
| + } |
| + } |
| + assertListEquality(packedInts); |
| + } |
| + |
| + public void testRandomEquality() { |
| + final int[] VALUE_COUNTS = new int[]{0, 1, 5, 8, 100, 500}; |
| + final int MIN_BITS_PER_VALUE = 1; |
| + final int MAX_BITS_PER_VALUE = 64; |
| + final int RANDOM_SEED = 87; |
| + |
| + for (int valueCount: VALUE_COUNTS) { |
| + for (int bitsPerValue = MIN_BITS_PER_VALUE ; |
| + bitsPerValue <= MAX_BITS_PER_VALUE ; |
| + bitsPerValue++) { |
| + assertRandomEquality(valueCount, bitsPerValue, RANDOM_SEED); |
| + } |
| + } |
| + } |
| + |
| + public void testAligned64Fill() throws IOException { |
| + testAlignedFill(1, 2, 3, |
| + PackedInts.STORAGE.aligned64, PackedInts.BLOCK.bit64); |
| + } |
| + |
| + public void testAlignedFill( |
| + int valueCount, int bitsPerValue, int value, |
| + PackedInts.STORAGE storage, PackedInts.BLOCK block) |
| + throws IOException { |
| +// long value = PackedInts.maxValue(bitsPerValue); |
| + |
| + Aligned64 aligned64 = new Aligned64(valueCount, bitsPerValue); |
| + for (int i = 0 ; i < valueCount ;i++) { |
| + aligned64.set(i, value); |
| + } |
| + for (long backing: aligned64.getBackingArray()) { |
| + System.out.println("Direct : " + Long.toBinaryString(backing) + " (" + backing + ")"); |
| + } |
| +/* assertEquals("Backing long 0 should be fully marked", |
| + ~0L, aligned64.getBackingArray()[0]); |
| + assertEquals("Backing long 1 should be fully marked", |
| + ~0L, aligned64.getBackingArray()[1]); |
| + */ |
| + final Directory d = new MockRAMDirectory(); |
| + IndexOutput out = d.createOutput("out.bin"); |
| + PackedInts.Writer w = PackedInts.getWriter( |
| + out, valueCount, bitsPerValue, storage, block); |
| + |
| + for (int i = 0 ; i < valueCount ; i++) { |
| + w.add(value); |
| + } |
| + w.finish(); |
| + out.close(); |
| + |
| + IndexInput in = d.openInput("out.bin"); |
| + PackedInts.Reader reader = PackedInts.getReader(in); |
| + |
| + for (int i = 0 ; i < valueCount ; i++) { |
| + assertEquals(String.format( |
| + "%s at %s with value count=%d and bits/value=%d at position %d", |
| + storage, block, valueCount, bitsPerValue, i), |
| + value, reader.get(i)); |
| + } |
| + } |
| + |
| + public void testRandomPersistenceEquality() { |
| + final int[] VALUE_COUNTS = new int[]{0, 1, 5, 8, 100, 500}; |
| + final int MIN_BITS_PER_VALUE = 1; |
| + final int MAX_BITS_PER_VALUE = 63; |
| + final int RANDOM_SEED = 87; |
| + |
| + for (int valueCount: VALUE_COUNTS) { |
| + for (int bitsPerValue = MIN_BITS_PER_VALUE ; |
| + bitsPerValue <= MAX_BITS_PER_VALUE ; |
| + bitsPerValue++) { |
| + assertRandomPersistenceEquality(valueCount, bitsPerValue, RANDOM_SEED); |
| + } |
| + } |
| + } |
| + |
| + public void testFactory() throws Exception { |
| + assertEquals(PackedInts.IMPLEMENTATION.aligned64, |
| + PackedInts.getImplementation( |
| + 1, PackedInts.STORAGE.aligned64, PackedInts.BLOCK.bit64)); |
| + } |
| + |
| + public void testControlledPersistenceEquality() throws IOException { |
| + final int RANDOM_SEED = 87; |
| + |
| +// assertRandomPersistenceEquality(1, 1, RANDOM_SEED); |
| +// assertRandomPersistenceEquality(98, 1, RANDOM_SEED); |
| + |
| + PackedInts.Reader aligned64 = writeAndRead( |
| + 1, 2, PackedInts.STORAGE.aligned64, PackedInts.BLOCK.bit32, |
| + RANDOM_SEED); |
| + PackedInts.Reader packed = writeAndRead( |
| + 1, 2, PackedInts.STORAGE.packed, PackedInts.BLOCK.bit32, |
| + RANDOM_SEED); |
| + assertEquals("The values at position 0 should match", |
| + aligned64.get(0), packed.get(0)); |
| + |
| + //assertRandomPersistenceEquality(99, 1, RANDOM_SEED); |
| + //assertRandomPersistenceEquality(1, 2, RANDOM_SEED); |
| + } |
| + |
| + /* ************************************************************************ */ |
| + |
| + /* ************************************************************************ */ |
| + |
| + private void assertRandomEquality( |
| + int valueCount, int bitsPerValue, int randomSeed) { |
| + List<PackedInts.Mutable> packedInts = |
| + createPackedInts(valueCount, bitsPerValue); |
| + for (PackedInts.Mutable packedInt: packedInts) { |
| + try { |
| + fill(packedInt, (long)(Math.pow(2, bitsPerValue)-1), randomSeed); |
| + } catch (Exception e) { |
| + e.printStackTrace(System.err); |
| + fail(String.format( |
| + "Exception while filling %s: valueCount=%d, bitsPerValue=%s", |
| + packedInt.getClass().getSimpleName(), |
| + valueCount, bitsPerValue)); |
| + } |
| + } |
| + assertListEquality(packedInts); |
| + } |
| + |
| + private void assertRandomPersistenceEquality( |
| + int valueCount, int bitsPerValue, int randomSeed) { |
| + List<PackedInts.Reader> packedInts = new ArrayList<PackedInts.Reader>(); |
| + for (PackedInts.STORAGE storage: PackedInts.STORAGE.values()) { |
| + try { |
| + packedInts.add(writeAndRead( |
| + valueCount, bitsPerValue, storage, PackedInts.BLOCK.bit32, |
| + randomSeed)); |
| + packedInts.add(writeAndRead( |
| + valueCount, bitsPerValue, storage, PackedInts.BLOCK.bit64, |
| + randomSeed)); |
| + } catch (Exception e) { |
| + e.printStackTrace(System.err); |
| + fail(String.format( |
| + "Exception while filling %s: valueCount=%d, bitsPerValue=%s", |
| + storage, valueCount, bitsPerValue)); |
| + } |
| + } |
| + assertListEquality("valueCount=" + valueCount +", bitsPerValue=" |
| + + bitsPerValue, packedInts); |
| + } |
| + |
| + private PackedInts.Reader writeAndRead( |
| + int valueCount, int bitsPerValue, |
| + PackedInts.STORAGE storage, PackedInts.BLOCK block, int randomSeed) |
| + throws IOException { |
| + long randMax = bitsPerValue >= 63 ? |
| + Long.MAX_VALUE : PackedInts.maxValue(bitsPerValue)+1; |
| + Random random = new Random(randomSeed); |
| + |
| + final Directory d = new MockRAMDirectory(); |
| + IndexOutput out = d.createOutput("out.bin"); |
| + PackedInts.Writer w = PackedInts.getWriter( |
| + out, valueCount, bitsPerValue, storage, block); |
| + |
| +// System.out.println("Writer: " + w); |
| + |
| + for (int i = 0 ; i < valueCount ; i++) { |
| + w.add(Math.abs(random.nextLong() % randMax)); |
| + } |
| + w.finish(); |
| + out.close(); |
| + |
| + IndexInput in = d.openInput("out.bin"); |
| + PackedInts.Reader reader = PackedInts.getReader(in); |
| +// System.out.println("Reader: " + reader); |
| + return reader; |
| + } |
| + |
| + private List<PackedInts.Mutable> createPackedInts( |
| + int valueCount, int bitsPerValue) { |
| + List<PackedInts.Mutable> packedInts = new ArrayList<PackedInts.Mutable>(); |
| + if (bitsPerValue <= 8) { |
| + packedInts.add(new Direct8(valueCount)); |
| + } |
| + if (bitsPerValue <= 16) { |
| + packedInts.add(new Direct16(valueCount)); |
| + } |
| + if (bitsPerValue <= 31) { |
| + packedInts.add(new Packed32(valueCount, bitsPerValue)); |
| + packedInts.add(new Aligned32(valueCount, bitsPerValue)); |
| + } |
| + if (bitsPerValue <= 32) { |
| + packedInts.add(new Direct32(valueCount)); |
| + } |
| + if (bitsPerValue <= 63) { |
| + packedInts.add(new Packed64(valueCount, bitsPerValue)); |
| + packedInts.add(new Aligned64(valueCount, bitsPerValue)); |
| + } |
| + packedInts.add(new Direct64(valueCount)); |
| + return packedInts; |
| + } |
| + |
| + private void fill( |
| + PackedInts.Mutable packedInt, long maxValue, int randomSeed) { |
| + maxValue++; |
| + Random random = new Random(randomSeed); |
| + for (int i = 0 ; i < packedInt.size() ; i++) { |
| + long value = Math.abs(random.nextLong() % maxValue); |
| + packedInt.set(i, value); |
| + assertEquals(String.format( |
| + "The set/get of the value at index %d should match for %s", |
| + i, packedInt.getClass().getSimpleName()), |
| + value, packedInt.get(i)); |
| + } |
| + } |
| + |
| + private void assertListEquality( |
| + List<? extends PackedInts.Reader> packedInts) { |
| + assertListEquality("", packedInts); |
| + } |
| + private void assertListEquality( |
| + String message, List<? extends PackedInts.Reader> packedInts) { |
| + if (packedInts.size() == 0) { |
| + return; |
| + } |
| + PackedInts.Reader base = packedInts.get(0); |
| + int valueCount = base.size(); |
| + for (PackedInts.Reader packedInt: packedInts) { |
| + assertEquals(message + ". The number of values should be the same ", |
| + valueCount, packedInt.size()); |
| + } |
| + for (int i = 0 ; i < valueCount ; i++) { |
| + for (int j = 1 ; j < packedInts.size() ; j++) { |
| + assertEquals(String.format( |
| + "%s. The value at index %d should be the same for %s and %s", |
| + message, i, base.getClass().getSimpleName(), |
| + packedInts.get(j).getClass().getSimpleName()), |
| + base.get(i), packedInts.get(j).get(i)); |
| + } |
| + } |
| + } |
| +} |