| Index: build.xml |
| =================================================================== |
| --- build.xml (revision 1367168) |
| +++ build.xml (working copy) |
| @@ -24,7 +24,7 @@ |
| </subant> |
| </target> |
| |
| - <target name="test" description="Test both Lucene and Solr"> |
| + <target name="test" description="Test both Lucene and Solr" depends="validate"> |
| <sequential> |
| <subant target="test" inheritall="false" failonerror="true"> |
| <fileset dir="lucene" includes="build.xml" /> |
| @@ -65,7 +65,7 @@ |
| </subant></sequential> |
| </target> |
| |
| - <target name="resolve" depends="clean-jars" description="Resolves all dependencies"> |
| + <target name="resolve" description="Resolves all dependencies"> |
| <sequential><subant target="resolve" inheritall="false" failonerror="true"> |
| <fileset dir="lucene" includes="build.xml" /> |
| <fileset dir="solr" includes="build.xml" /> |
| @@ -74,13 +74,19 @@ |
| |
| <target name="compile" description="Compile Lucene and Solr"> |
| <sequential> |
| - |
| <subant target="compile" inheritall="false" failonerror="true"> |
| <fileset dir="lucene" includes="build.xml" /> |
| <fileset dir="solr" includes="build.xml" /> |
| </subant> |
| </sequential> |
| </target> |
| + <target name="compile-core" description="Compile"> |
| + <sequential> |
| + <subant target="compile-core" inheritall="false" failonerror="true"> |
| + <fileset dir="lucene" includes="build.xml" /> |
| + </subant> |
| + </sequential> |
| + </target> |
| |
| <property name="version" value="5.0-SNAPSHOT"/> |
| <property name="maven-build-dir" value="maven-build"/> |
| @@ -116,7 +122,7 @@ |
| </sequential> |
| </target> |
| |
| - <target name="eclipse" depends="clean-jars, resolve" description="Setup Eclipse configuration"> |
| + <target name="eclipse" description="Setup Eclipse configuration" depends="resolve"> |
| <copy file="dev-tools/eclipse/dot.project" tofile=".project" overwrite="false"/> |
| <copy file="dev-tools/eclipse/dot.classpath" tofile=".classpath" overwrite="true"/> |
| <mkdir dir=".settings"/> |
| @@ -129,7 +135,7 @@ |
| </echo> |
| </target> |
| |
| - <target name="idea" depends="clean-jars, resolve" description="Setup IntelliJ IDEA configuration"> |
| + <target name="idea" description="Setup IntelliJ IDEA configuration" depends="resolve"> |
| <copy todir="."> |
| <fileset dir="dev-tools/idea"/> |
| </copy> |
| @@ -138,7 +144,6 @@ |
| File | Project Structure | Project | Project SDK. |
| </echo> |
| </target> |
| - |
| <target name="clean-idea" |
| description="Removes all IntelliJ IDEA configuration files"> |
| <delete dir=".idea" failonerror="true"/> |
| @@ -176,7 +181,7 @@ |
| </subant> |
| </target> |
| |
| - <target name="jar-checksums" depends="resolve" description="Recompute SHA1 checksums for all JAR files."> |
| + <target name="jar-checksums" description="Recompute SHA1 checksums for all JAR files."> |
| <delete> |
| <fileset dir="${basedir}"> |
| <include name="**/*.jar.sha1"/> |
| Index: lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java |
| =================================================================== |
| --- lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java (revision 1367168) |
| +++ lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java (working copy) |
| @@ -61,7 +61,6 @@ |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.LuceneTestCase; |
| import org.apache.lucene.util._TestUtil; |
| -import org.apache.lucene.codecs.pfor.*; |
| |
| /** |
| * Randomly combines terms index impl w/ postings impls. |
| @@ -103,8 +102,6 @@ |
| final int baseBlockSize = _TestUtil.nextInt(random, 1, 127); |
| delegates.add(new MockVariableIntBlockPostingsFormat.MockIntFactory(baseBlockSize)); |
| // TODO: others |
| - delegates.add(new ForFactory()); |
| - delegates.add(new PForFactory()); |
| } |
| |
| private static String getExtension(String fileName) { |
| Index: lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java |
| =================================================================== |
| --- lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java (revision 1367168) |
| +++ lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java (working copy) |
| @@ -283,8 +283,7 @@ |
| "MockVariableIntBlock", |
| "MockSep", |
| "MockRandom", |
| - "For", |
| - "PFor" |
| + "For" |
| )); |
| |
| // ----------------------------------------------------------------- |
| Index: lucene/core/src/test/org/apache/lucene/codecs/pfor/TestPForUtil.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/codecs/pfor/TestPForUtil.java (revision 1367168) |
| +++ lucene/core/src/test/org/apache/lucene/codecs/pfor/TestPForUtil.java (working copy) |
| @@ -1,293 +0,0 @@ |
| -package org.apache.lucene.codecs.pfor; |
| - |
| -/* |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.nio.ByteBuffer; |
| -import java.nio.IntBuffer; |
| -import java.util.Arrays; |
| -import java.util.Collections; |
| -import java.util.Locale; |
| -import java.util.Random; |
| - |
| -import org.apache.lucene.codecs.pfor.ForPostingsFormat; |
| -import org.apache.lucene.codecs.pfor.PForUtil; |
| -import org.apache.lucene.util.LuceneTestCase; |
| - |
| -/** |
| - * Test the core utility for PFor compress and decompress |
| - * We don't specially provide test case for For encoder/decoder, since |
| - * PFor is a extended version of For, and most methods will be reused |
| - * here. |
| - */ |
| -public class TestPForUtil extends LuceneTestCase { |
| - static final int[] MASK={ 0x00000000, |
| - 0x00000001, 0x00000003, 0x00000007, 0x0000000f, 0x0000001f, 0x0000003f, |
| - 0x0000007f, 0x000000ff, 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff, |
| - 0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff, 0x0001ffff, 0x0003ffff, |
| - 0x0007ffff, 0x000fffff, 0x001fffff, 0x003fffff, 0x007fffff, 0x00ffffff, |
| - 0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff, 0x1fffffff, 0x3fffffff, |
| - 0x7fffffff, 0xffffffff}; |
| - Random gen; |
| - public void initRandom() { |
| - this.gen = random(); |
| - } |
| - |
| - /** |
| - * Should not encode extra information other than single int |
| - */ |
| - public void testAllEqual() throws Exception { |
| - initRandom(); |
| - int sz=ForPostingsFormat.DEFAULT_BLOCK_SIZE; |
| - int[] data=new int[sz]; |
| - byte[] res = new byte[sz*8]; |
| - int[] copy = new int[sz]; |
| - IntBuffer resBuffer = ByteBuffer.wrap(res).asIntBuffer(); |
| - int ensz; |
| - int header; |
| - |
| - Arrays.fill(data,gen.nextInt()); |
| - header = ForUtil.compress(data,resBuffer); // test For |
| - ensz = ForUtil.getEncodedSize(header); |
| - assert ensz == 4; |
| - |
| - ForUtil.decompress(resBuffer,copy,header); |
| - assert cmp(data,sz,copy,sz)==true; |
| - |
| - Arrays.fill(data,gen.nextInt()); |
| - header = PForUtil.compress(data,resBuffer); // test PFor |
| - ensz = PForUtil.getEncodedSize(header); |
| - assert ensz == 4; |
| - |
| - PForUtil.decompress(resBuffer,copy,header); |
| - assert cmp(data,sz,copy,sz)==true; |
| - } |
| - |
| - /** |
| - * Test correctness of forced exception. |
| - * the forced ones should exactly fit max chain |
| - */ |
| - public void testForcedExceptionDistance() throws Exception { |
| - initRandom(); |
| - int sz=ForPostingsFormat.DEFAULT_BLOCK_SIZE; |
| - int[] data=new int[sz]; |
| - byte[] res = new byte[sz*8]; |
| - int[] copy = new int[sz]; |
| - IntBuffer resBuffer = ByteBuffer.wrap(res).asIntBuffer(); |
| - int numBits = gen.nextInt(5)+1; |
| - |
| - int i,j; |
| - int pace, ensz, header; |
| - int expect, got; |
| - |
| - // fill exception value with same pace, there should |
| - // be no forced exceptions. |
| - createDistribution(data, sz, 1, MASK[numBits], MASK[numBits]); |
| - pace = 1<<numBits; |
| - for (i=0,j=0; i<sz; i+=pace) { |
| - int exc = gen.nextInt(); |
| - data[i] = (exc & 0xffff0000) == 0 ? exc | 0xffff0000 : exc; |
| - j++; |
| - } |
| - header = PForUtil.compress(data,resBuffer); |
| - ensz = PForUtil.getEncodedSize(header); |
| - expect = j; |
| - got = PForUtil.getExcNum(header); |
| - assert expect == got: expect+" expected but got "+got; |
| - |
| - // there should exactly one forced exception before each |
| - // exception when i>0 |
| - createDistribution(data, sz, 1, MASK[numBits], MASK[numBits]); |
| - pace = (1<<numBits)+1; |
| - for (i=0,j=0; i<sz; i+=pace) { |
| - int exc = gen.nextInt(); |
| - data[i] = (exc & 0xffff0000) == 0 ? exc | 0xffff0000 : exc; |
| - j++; |
| - } |
| - header = PForUtil.compress(data,resBuffer); |
| - ensz = PForUtil.getEncodedSize(header); |
| - expect = 2*(j-1)+1; |
| - got = PForUtil.getExcNum(header); |
| - assert expect == got: expect+" expected but got "+got; |
| - |
| - |
| - // two forced exception |
| - createDistribution(data, sz, 1, MASK[numBits], MASK[numBits]); |
| - pace = (1<<numBits)*2+1; |
| - for (i=0,j=0; i<sz; i+=pace) { |
| - int exc = gen.nextInt(); |
| - data[i] = (exc & 0xffff0000) == 0 ? exc | 0xffff0000 : exc; |
| - j++; |
| - } |
| - header = PForUtil.compress(data,resBuffer); |
| - ensz = PForUtil.getEncodedSize(header); |
| - expect = 3*(j-1)+1; |
| - got = PForUtil.getExcNum(header); |
| - assert expect == got: expect+" expected but got "+got; |
| - |
| - } |
| - /** |
| - * Test correctness of ignored forced exception. |
| - * The trailing forced exceptions should always be reverted |
| - * since they're not necessary. |
| - */ |
| - public void testTrailingForcedException() throws Exception { |
| - initRandom(); |
| - int sz=ForPostingsFormat.DEFAULT_BLOCK_SIZE; |
| - assert sz % 32 == 0; |
| - Integer[] buff= new Integer[sz]; |
| - int[] data = new int[sz]; |
| - int[] copy = new int[sz]; |
| - byte[] res = new byte[sz*8]; |
| - IntBuffer resBuffer = ByteBuffer.wrap(res).asIntBuffer(); |
| - |
| - int excIndex = gen.nextInt(sz/2); |
| - int excValue = gen.nextInt(); |
| - if ((excValue & 0xffff0000) == 0) { |
| - excValue |= 0xffff0000; // always prepare a 4 bytes exception |
| - } |
| - |
| - // make value of numFrameBits to be small, |
| - // thus easy to get forced exceptions |
| - for (int i=0; i<sz; ++i) { |
| - buff[i]=gen.nextInt() & 1; |
| - } |
| - // create only one value exception |
| - buff[excIndex]=excValue; |
| - |
| - for (int i=0; i<sz; ++i) |
| - data[i] = buff[i]; |
| - |
| - int header = PForUtil.compress(data,resBuffer); |
| - int ensz = PForUtil.getEncodedSize(header); |
| - |
| - assert (ensz <= sz*8): ensz+" > "+sz*8; // must not exceed the loose upperbound |
| - assert (ensz >= 4); // at least we have an exception, right? |
| - |
| - PForUtil.decompress(resBuffer,copy,header); |
| - |
| -// println(getHex(data,sz)+"\n"); |
| -// println(getHex(res,ensz)+"\n"); |
| -// println(getHex(copy,sz)+"\n"); |
| - |
| - // fetch the last int, i.e. last exception. |
| - int lastExc = (res[ensz-4] << 24) | |
| - ((0xff & res[ensz-3]) << 16) | |
| - ((0xff & res[ensz-2]) << 8 ) | |
| - (0xff & res[ensz-1]); |
| - |
| - // trailing forced exceptions are suppressed, |
| - // so the last exception should be what we assigned. |
| - assert lastExc==excValue; |
| - assert cmp(data,sz,copy,sz)==true; |
| - } |
| - |
| - /** |
| - * Test correctness of compressing and decompressing. |
| - * Here we randomly assign a rate of exception (i.e. 1-alpha), |
| - * and test different scale of normal/exception values. |
| - */ |
| - public void testAllDistribution() throws Exception { |
| - initRandom(); |
| - int sz = ForPostingsFormat.DEFAULT_BLOCK_SIZE; |
| - int[] data = new int[sz]; |
| - for (int i=0; i<=32; ++i) { // try to test every kinds of distribution |
| - double alpha=gen.nextDouble(); // rate of normal value |
| - for (int j=i; j<=32; ++j) { |
| - createDistribution(data,sz,alpha,MASK[i],MASK[j]); |
| - tryCompressAndDecompress(data, sz); |
| - } |
| - } |
| - } |
| - public void createDistribution(int[] data, int sz, double alpha, int masknorm, int maskexc) { |
| - Integer[] buff= new Integer[sz]; |
| - int i=0; |
| - for (; i<sz*alpha; ++i) |
| - buff[i]=gen.nextInt() & masknorm; |
| - for (; i<sz; ++i) |
| - buff[i]=gen.nextInt() & maskexc; |
| - Collections.shuffle(Arrays.asList(buff),gen); |
| - for (i=0; i<sz; ++i) |
| - data[i] = buff[i]; |
| - } |
| - public void tryCompressAndDecompress(final int[] data, int sz) throws Exception { |
| - byte[] res = new byte[sz*8]; // loosely upperbound |
| - IntBuffer resBuffer = ByteBuffer.wrap(res).asIntBuffer(); |
| - |
| - int header = PForUtil.compress(data,resBuffer); |
| - int ensz = PForUtil.getEncodedSize(header); |
| - |
| - assert (ensz <= sz*8); // must not exceed the loose upperbound |
| - |
| - int[] copy = new int[sz]; |
| - PForUtil.decompress(resBuffer,copy,header); |
| - |
| -// println(getHex(data,sz)+"\n"); |
| -// println(getHex(res,ensz)+"\n"); |
| -// println(getHex(copy,sz)+"\n"); |
| - |
| - assert cmp(data,sz,copy,sz)==true; |
| - } |
| - public boolean cmp(int[] a, int sza, int[] b, int szb) { |
| - if (sza!=szb) |
| - return false; |
| - for (int i=0; i<sza; ++i) { |
| - if (a[i]!=b[i]) { |
| - System.err.println(String.format(Locale.ENGLISH, "! %08x != %08x in %d",a[i],b[i],i)); |
| - return false; |
| - } |
| - } |
| - return true; |
| - } |
| - public static String getHex( byte [] raw, int sz ) { |
| - final String HEXES = "0123456789ABCDEF"; |
| - if ( raw == null ) { |
| - return null; |
| - } |
| - final StringBuilder hex = new StringBuilder( 2 * raw.length ); |
| - for ( int i=0; i<sz; i++ ) { |
| - if (i>0 && (i)%16 == 0) |
| - hex.append("\n"); |
| - byte b=raw[i]; |
| - hex.append(HEXES.charAt((b & 0xF0) >> 4)) |
| - .append(HEXES.charAt((b & 0x0F))) |
| - .append(" "); |
| - } |
| - return hex.toString(); |
| - } |
| - public static String getHex( int [] raw, int sz ) { |
| - if ( raw == null ) { |
| - return null; |
| - } |
| - final StringBuilder hex = new StringBuilder( 4 * raw.length ); |
| - for ( int i=0; i<sz; i++ ) { |
| - if (i>0 && i%8 == 0) |
| - hex.append("\n"); |
| - hex.append(String.format(Locale.ENGLISH, "%08x ",raw[i])); |
| - } |
| - return hex.toString(); |
| - } |
| - static void eprintln(String format, Object... args) { |
| - System.err.println(String.format(Locale.ENGLISH, format,args)); |
| - } |
| - static void println(String format, Object... args) { |
| - System.out.println(String.format(Locale.ENGLISH, format,args)); |
| - } |
| - static void print(String format, Object... args) { |
| - System.out.print(String.format(Locale.ENGLISH, format,args)); |
| - } |
| -} |
| Index: lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsWriter.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsWriter.java (revision 1367168) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsWriter.java (working copy) |
| @@ -20,6 +20,7 @@ |
| import java.io.IOException; |
| import java.nio.ByteBuffer; |
| import java.nio.IntBuffer; |
| +import java.nio.LongBuffer; |
| import java.util.ArrayList; |
| import java.util.List; |
| |
| @@ -78,14 +79,20 @@ |
| private long posTermStartFP; |
| private long payTermStartFP; |
| |
| - final int[] docDeltaBuffer; |
| - final int[] freqBuffer; |
| + final long[] docDeltaBuffer; |
| + final long[] freqBuffer; |
| + final LongBuffer docDeltaLBuffer; |
| + final LongBuffer freqLBuffer; |
| private int docBufferUpto; |
| |
| - final int[] posDeltaBuffer; |
| - final int[] payloadLengthBuffer; |
| - final int[] offsetStartDeltaBuffer; |
| - final int[] offsetLengthBuffer; |
| + final long[] posDeltaBuffer; |
| + final long[] payloadLengthBuffer; |
| + final long[] offsetStartDeltaBuffer; |
| + final long[] offsetLengthBuffer; |
| + final LongBuffer posDeltaLBuffer; |
| + final LongBuffer payloadLengthLBuffer; |
| + final LongBuffer offsetStartDeltaLBuffer; |
| + final LongBuffer offsetLengthLBuffer; |
| private int posBufferUpto; |
| |
| private byte[] payloadBytes; |
| @@ -120,25 +127,32 @@ |
| try { |
| CodecUtil.writeHeader(docOut, DOC_CODEC, VERSION_CURRENT); |
| if (state.fieldInfos.hasProx()) { |
| - posDeltaBuffer = new int[blockSize]; |
| + posDeltaBuffer = new long[blockSize]; |
| + posDeltaLBuffer = LongBuffer.wrap(posDeltaBuffer); |
| posOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BlockPostingsFormat.POS_EXTENSION), |
| state.context); |
| CodecUtil.writeHeader(posOut, POS_CODEC, VERSION_CURRENT); |
| |
| if (state.fieldInfos.hasPayloads()) { |
| payloadBytes = new byte[128]; |
| - payloadLengthBuffer = new int[blockSize]; |
| + payloadLengthBuffer = new long[blockSize]; |
| + payloadLengthLBuffer = LongBuffer.wrap(payloadLengthBuffer); |
| } else { |
| payloadBytes = null; |
| payloadLengthBuffer = null; |
| + payloadLengthLBuffer = null; |
| } |
| |
| if (state.fieldInfos.hasOffsets()) { |
| - offsetStartDeltaBuffer = new int[blockSize]; |
| - offsetLengthBuffer = new int[blockSize]; |
| + offsetStartDeltaBuffer = new long[blockSize]; |
| + offsetLengthBuffer = new long[blockSize]; |
| + offsetStartDeltaLBuffer = LongBuffer.wrap(offsetStartDeltaBuffer); |
| + offsetLengthLBuffer = LongBuffer.wrap(offsetLengthBuffer); |
| } else { |
| offsetStartDeltaBuffer = null; |
| offsetLengthBuffer = null; |
| + offsetStartDeltaLBuffer = null; |
| + offsetLengthLBuffer = null; |
| } |
| |
| if (state.fieldInfos.hasPayloads() || state.fieldInfos.hasOffsets()) { |
| @@ -152,6 +166,10 @@ |
| offsetStartDeltaBuffer = null; |
| offsetLengthBuffer = null; |
| payloadBytes = null; |
| + posDeltaLBuffer = null; |
| + payloadLengthLBuffer = null; |
| + offsetStartDeltaLBuffer = null; |
| + offsetLengthLBuffer = null; |
| } |
| this.payOut = payOut; |
| this.posOut = posOut; |
| @@ -162,8 +180,10 @@ |
| } |
| } |
| |
| - docDeltaBuffer = new int[blockSize]; |
| - freqBuffer = new int[blockSize]; |
| + docDeltaBuffer = new long[blockSize]; |
| + freqBuffer = new long[blockSize]; |
| + docDeltaLBuffer = LongBuffer.wrap(docDeltaBuffer); |
| + freqLBuffer = LongBuffer.wrap(freqBuffer); |
| |
| skipWriter = new BlockSkipWriter(blockSize, |
| maxSkipLevels, |
| @@ -172,7 +192,7 @@ |
| posOut, |
| payOut); |
| |
| - encoded = new byte[blockSize*4 + 4]; |
| + encoded = new byte[blockSize*4]; |
| encodedBuffer = ByteBuffer.wrap(encoded).asIntBuffer(); |
| } |
| |
| @@ -210,9 +230,8 @@ |
| skipWriter.resetSkip(); |
| } |
| |
| - private void writeBlock(int[] buffer, IndexOutput out) throws IOException { |
| + private void writeBlock(LongBuffer buffer, IndexOutput out) throws IOException { |
| final int header = ForUtil.compress(buffer, encodedBuffer); |
| - //System.out.println(" block has " + numBytes + " bytes"); |
| out.writeVInt(header); |
| out.writeBytes(encoded, ForUtil.getEncodedSize(header)); |
| } |
| @@ -278,12 +297,12 @@ |
| if (DEBUG) { |
| System.out.println(" write docDelta block @ fp=" + docOut.getFilePointer()); |
| } |
| - writeBlock(docDeltaBuffer, docOut); |
| + writeBlock(docDeltaLBuffer, docOut); |
| if (fieldHasFreqs) { |
| if (DEBUG) { |
| System.out.println(" write freq block @ fp=" + docOut.getFilePointer()); |
| } |
| - writeBlock(freqBuffer, docOut); |
| + writeBlock(freqLBuffer, docOut); |
| } |
| docBufferUpto = 0; |
| } |
| @@ -327,17 +346,17 @@ |
| if (DEBUG) { |
| System.out.println(" write pos bulk block @ fp=" + posOut.getFilePointer()); |
| } |
| - writeBlock(posDeltaBuffer, posOut); |
| + writeBlock(posDeltaLBuffer, posOut); |
| |
| if (fieldHasPayloads) { |
| - writeBlock(payloadLengthBuffer, payOut); |
| + writeBlock(payloadLengthLBuffer, payOut); |
| payOut.writeVInt(payloadByteUpto); |
| payOut.writeBytes(payloadBytes, 0, payloadByteUpto); |
| payloadByteUpto = 0; |
| } |
| if (fieldHasOffsets) { |
| - writeBlock(offsetStartDeltaBuffer, payOut); |
| - writeBlock(offsetLengthBuffer, payOut); |
| + writeBlock(offsetStartDeltaLBuffer, payOut); |
| + writeBlock(offsetLengthLBuffer, payOut); |
| } |
| posBufferUpto = 0; |
| } |
| @@ -400,8 +419,8 @@ |
| |
| // vInt encode the remaining doc deltas and freqs: |
| for(int i=0;i<docBufferUpto;i++) { |
| - final int docDelta = docDeltaBuffer[i]; |
| - final int freq = freqBuffer[i]; |
| + final int docDelta = (int)docDeltaBuffer[i]; |
| + final int freq = (int)freqBuffer[i]; |
| if (!fieldHasFreqs) { |
| docOut.writeVInt(docDelta); |
| } else if (freqBuffer[i] == 1) { |
| @@ -439,9 +458,9 @@ |
| int lastPayloadLength = -1; |
| int payloadBytesReadUpto = 0; |
| for(int i=0;i<posBufferUpto;i++) { |
| - final int posDelta = posDeltaBuffer[i]; |
| + final int posDelta = (int)posDeltaBuffer[i]; |
| if (fieldHasPayloads) { |
| - final int payloadLength = payloadLengthBuffer[i]; |
| + final int payloadLength = (int)payloadLengthBuffer[i]; |
| if (payloadLength != lastPayloadLength) { |
| lastPayloadLength = payloadLength; |
| posOut.writeVInt((posDelta<<1)|1); |
| @@ -469,8 +488,8 @@ |
| if (DEBUG) { |
| System.out.println(" write offset @ pos.fp=" + posOut.getFilePointer()); |
| } |
| - posOut.writeVInt(offsetStartDeltaBuffer[i]); |
| - posOut.writeVInt(offsetLengthBuffer[i]); |
| + posOut.writeVInt((int)offsetStartDeltaBuffer[i]); |
| + posOut.writeVInt((int)offsetLengthBuffer[i]); |
| } |
| } |
| |
| Index: lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java (revision 1367168) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java (working copy) |
| @@ -20,6 +20,7 @@ |
| import java.io.IOException; |
| import java.nio.ByteBuffer; |
| import java.nio.IntBuffer; |
| +import java.nio.LongBuffer; |
| |
| import org.apache.lucene.codecs.BlockTermState; |
| import org.apache.lucene.codecs.CodecUtil; |
| @@ -118,10 +119,10 @@ |
| } |
| } |
| |
| - static void readBlock(IndexInput in, byte[] encoded, IntBuffer encodedBuffer, int[] buffer) throws IOException { |
| + static void readBlock(IndexInput in, byte[] encoded, IntBuffer encodedBuffer, LongBuffer buffer) throws IOException { |
| int header = in.readVInt(); |
| in.readBytes(encoded, 0, ForUtil.getEncodedSize(header)); |
| - ForUtil.decompress(encodedBuffer, buffer, header); |
| + ForUtil.decompress(buffer, encodedBuffer, header); |
| } |
| |
| static void skipBlock(IndexInput in) throws IOException { |
| @@ -303,8 +304,10 @@ |
| private final byte[] encoded; |
| private final IntBuffer encodedBuffer; |
| |
| - private final int[] docDeltaBuffer = new int[blockSize]; |
| - private final int[] freqBuffer = new int[blockSize]; |
| + private final long[] docDeltaBuffer = new long[blockSize]; |
| + private final long[] freqBuffer = new long[blockSize]; |
| + private final LongBuffer docDeltaLBuffer = LongBuffer.wrap(docDeltaBuffer); |
| + private final LongBuffer freqLBuffer = LongBuffer.wrap(freqBuffer); |
| |
| private int docBufferUpto; |
| |
| @@ -342,7 +345,7 @@ |
| indexHasPos = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; |
| indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; |
| indexHasPayloads = fieldInfo.hasPayloads(); |
| - encoded = new byte[blockSize*4 + 4]; |
| + encoded = new byte[blockSize*4]; |
| encodedBuffer = ByteBuffer.wrap(encoded).asIntBuffer(); |
| } |
| |
| @@ -389,13 +392,13 @@ |
| if (DEBUG) { |
| System.out.println(" fill doc block from fp=" + docIn.getFilePointer()); |
| } |
| - readBlock(docIn, encoded, encodedBuffer, docDeltaBuffer); |
| + readBlock(docIn, encoded, encodedBuffer, docDeltaLBuffer); |
| |
| if (indexHasFreq) { |
| if (DEBUG) { |
| System.out.println(" fill freq block from fp=" + docIn.getFilePointer()); |
| } |
| - readBlock(docIn, encoded, encodedBuffer, freqBuffer); |
| + readBlock(docIn, encoded, encodedBuffer, freqLBuffer); |
| } |
| } else { |
| // Read vInts: |
| @@ -450,7 +453,7 @@ |
| |
| if (liveDocs == null || liveDocs.get(accum)) { |
| doc = accum; |
| - freq = freqBuffer[docBufferUpto]; |
| + freq = (int)freqBuffer[docBufferUpto]; |
| docBufferUpto++; |
| if (DEBUG) { |
| System.out.println(" return doc=" + doc + " freq=" + freq); |
| @@ -541,10 +544,15 @@ |
| private final byte[] encoded; |
| private final IntBuffer encodedBuffer; |
| |
| - private final int[] docDeltaBuffer = new int[blockSize]; |
| - private final int[] freqBuffer = new int[blockSize]; |
| - private final int[] posDeltaBuffer = new int[blockSize]; |
| + private final long[] docDeltaBuffer = new long[blockSize]; |
| + private final long[] freqBuffer = new long[blockSize]; |
| + private final long[] posDeltaBuffer = new long[blockSize]; |
| |
| + |
| + private final LongBuffer docDeltaLBuffer = LongBuffer.wrap(docDeltaBuffer); |
| + private final LongBuffer freqLBuffer = LongBuffer.wrap(freqBuffer); |
| + private final LongBuffer posDeltaLBuffer = LongBuffer.wrap(posDeltaBuffer); |
| + |
| private int docBufferUpto; |
| private int posBufferUpto; |
| |
| @@ -600,7 +608,7 @@ |
| this.startDocIn = BlockPostingsReader.this.docIn; |
| this.docIn = (IndexInput) startDocIn.clone(); |
| this.posIn = (IndexInput) BlockPostingsReader.this.posIn.clone(); |
| - encoded = new byte[blockSize*4 + 4]; |
| + encoded = new byte[blockSize*4]; |
| encodedBuffer = ByteBuffer.wrap(encoded).asIntBuffer(); |
| indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; |
| indexHasPayloads = fieldInfo.hasPayloads(); |
| @@ -660,13 +668,13 @@ |
| System.out.println(" fill doc block from fp=" + docIn.getFilePointer()); |
| } |
| |
| - readBlock(docIn, encoded, encodedBuffer, docDeltaBuffer); |
| + readBlock(docIn, encoded, encodedBuffer, docDeltaLBuffer); |
| |
| if (DEBUG) { |
| System.out.println(" fill freq block from fp=" + docIn.getFilePointer()); |
| } |
| |
| - readBlock(docIn, encoded, encodedBuffer, freqBuffer); |
| + readBlock(docIn, encoded, encodedBuffer, freqLBuffer); |
| } else { |
| // Read vInts: |
| if (DEBUG) { |
| @@ -718,7 +726,7 @@ |
| if (DEBUG) { |
| System.out.println(" bulk pos block @ fp=" + posIn.getFilePointer()); |
| } |
| - readBlock(posIn, encoded, encodedBuffer, posDeltaBuffer); |
| + readBlock(posIn, encoded, encodedBuffer, posDeltaLBuffer); |
| } |
| } |
| |
| @@ -745,8 +753,8 @@ |
| if (DEBUG) { |
| System.out.println(" accum=" + accum + " docDeltaBuffer[" + docBufferUpto + "]=" + docDeltaBuffer[docBufferUpto]); |
| } |
| - accum += docDeltaBuffer[docBufferUpto]; |
| - freq = freqBuffer[docBufferUpto]; |
| + accum += (int)docDeltaBuffer[docBufferUpto]; |
| + freq = (int)freqBuffer[docBufferUpto]; |
| posPendingCount += freq; |
| docBufferUpto++; |
| docUpto++; |
| @@ -907,7 +915,7 @@ |
| refillPositions(); |
| posBufferUpto = 0; |
| } |
| - position += posDeltaBuffer[posBufferUpto++]; |
| + position += (int)posDeltaBuffer[posBufferUpto++]; |
| posPendingCount--; |
| if (DEBUG) { |
| System.out.println(" return pos=" + position); |
| @@ -942,14 +950,23 @@ |
| private final byte[] encoded; |
| private final IntBuffer encodedBuffer; |
| |
| - private final int[] docDeltaBuffer = new int[blockSize]; |
| - private final int[] freqBuffer = new int[blockSize]; |
| - private final int[] posDeltaBuffer = new int[blockSize]; |
| + private final long[] docDeltaBuffer = new long[blockSize]; |
| + private final long[] freqBuffer = new long[blockSize]; |
| + private final long[] posDeltaBuffer = new long[blockSize]; |
| |
| - private final int[] payloadLengthBuffer; |
| - private final int[] offsetStartDeltaBuffer; |
| - private final int[] offsetLengthBuffer; |
| + private final long[] payloadLengthBuffer; |
| + private final long[] offsetStartDeltaBuffer; |
| + private final long[] offsetLengthBuffer; |
| |
| + |
| + private final LongBuffer docDeltaLBuffer = LongBuffer.wrap(docDeltaBuffer); |
| + private final LongBuffer freqLBuffer = LongBuffer.wrap(freqBuffer); |
| + private final LongBuffer posDeltaLBuffer = LongBuffer.wrap(posDeltaBuffer); |
| + |
| + private final LongBuffer payloadLengthLBuffer; |
| + private final LongBuffer offsetStartDeltaLBuffer; |
| + private final LongBuffer offsetLengthLBuffer; |
| + |
| private byte[] payloadBytes; |
| private int payloadByteUpto; |
| private int payloadLength; |
| @@ -1020,26 +1037,32 @@ |
| this.docIn = (IndexInput) startDocIn.clone(); |
| this.posIn = (IndexInput) BlockPostingsReader.this.posIn.clone(); |
| this.payIn = (IndexInput) BlockPostingsReader.this.payIn.clone(); |
| - encoded = new byte[blockSize*4 + 4]; |
| + encoded = new byte[blockSize*4]; |
| encodedBuffer = ByteBuffer.wrap(encoded).asIntBuffer(); |
| indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; |
| if (indexHasOffsets) { |
| - offsetStartDeltaBuffer = new int[blockSize]; |
| - offsetLengthBuffer = new int[blockSize]; |
| + offsetStartDeltaBuffer = new long[blockSize]; |
| + offsetLengthBuffer = new long[blockSize]; |
| + offsetStartDeltaLBuffer = LongBuffer.wrap(offsetStartDeltaBuffer); |
| + offsetLengthLBuffer = LongBuffer.wrap(offsetLengthBuffer); |
| } else { |
| offsetStartDeltaBuffer = null; |
| + offsetStartDeltaLBuffer = null; |
| offsetLengthBuffer = null; |
| + offsetLengthLBuffer = null; |
| startOffset = -1; |
| endOffset = -1; |
| } |
| |
| indexHasPayloads = fieldInfo.hasPayloads(); |
| if (indexHasPayloads) { |
| - payloadLengthBuffer = new int[blockSize]; |
| + payloadLengthBuffer = new long[blockSize]; |
| + payloadLengthLBuffer = LongBuffer.wrap(payloadLengthBuffer); |
| payloadBytes = new byte[128]; |
| payload = new BytesRef(); |
| } else { |
| payloadLengthBuffer = null; |
| + payloadLengthLBuffer = null; |
| payloadBytes = null; |
| payload = null; |
| } |
| @@ -1100,13 +1123,13 @@ |
| System.out.println(" fill doc block from fp=" + docIn.getFilePointer()); |
| } |
| |
| - readBlock(docIn, encoded, encodedBuffer, docDeltaBuffer); |
| + readBlock(docIn, encoded, encodedBuffer, docDeltaLBuffer); |
| |
| if (DEBUG) { |
| System.out.println(" fill freq block from fp=" + docIn.getFilePointer()); |
| } |
| |
| - readBlock(docIn, encoded, encodedBuffer, freqBuffer); |
| + readBlock(docIn, encoded, encodedBuffer, freqLBuffer); |
| } else { |
| // Read vInts: |
| if (DEBUG) { |
| @@ -1175,13 +1198,13 @@ |
| if (DEBUG) { |
| System.out.println(" bulk pos block @ fp=" + posIn.getFilePointer()); |
| } |
| - readBlock(posIn, encoded, encodedBuffer, posDeltaBuffer); |
| + readBlock(posIn, encoded, encodedBuffer, posDeltaLBuffer); |
| |
| if (indexHasPayloads) { |
| if (DEBUG) { |
| System.out.println(" bulk payload block @ pay.fp=" + payIn.getFilePointer()); |
| } |
| - readBlock(payIn, encoded, encodedBuffer, payloadLengthBuffer); |
| + readBlock(payIn, encoded, encodedBuffer, payloadLengthLBuffer); |
| int numBytes = payIn.readVInt(); |
| if (DEBUG) { |
| System.out.println(" " + numBytes + " payload bytes @ pay.fp=" + payIn.getFilePointer()); |
| @@ -1197,8 +1220,8 @@ |
| if (DEBUG) { |
| System.out.println(" bulk offset block @ pay.fp=" + payIn.getFilePointer()); |
| } |
| - readBlock(payIn, encoded, encodedBuffer, offsetStartDeltaBuffer); |
| - readBlock(payIn, encoded, encodedBuffer, offsetLengthBuffer); |
| + readBlock(payIn, encoded, encodedBuffer, offsetStartDeltaLBuffer); |
| + readBlock(payIn, encoded, encodedBuffer, offsetLengthLBuffer); |
| } |
| } |
| } |
| @@ -1231,8 +1254,8 @@ |
| if (DEBUG) { |
| System.out.println(" accum=" + accum + " docDeltaBuffer[" + docBufferUpto + "]=" + docDeltaBuffer[docBufferUpto]); |
| } |
| - accum += docDeltaBuffer[docBufferUpto]; |
| - freq = freqBuffer[docBufferUpto]; |
| + accum += (int)docDeltaBuffer[docBufferUpto]; |
| + freq = (int)freqBuffer[docBufferUpto]; |
| posPendingCount += freq; |
| docBufferUpto++; |
| docUpto++; |
| @@ -1383,8 +1406,8 @@ |
| if (indexHasOffsets) { |
| // Must load offset blocks merely to sum |
| // up into lastEndOffset: |
| - readBlock(payIn, encoded, encodedBuffer, offsetStartDeltaBuffer); |
| - readBlock(payIn, encoded, encodedBuffer, offsetLengthBuffer); |
| + readBlock(payIn, encoded, encodedBuffer, offsetStartDeltaLBuffer); |
| + readBlock(payIn, encoded, encodedBuffer, offsetLengthLBuffer); |
| for(int i=0;i<blockSize;i++) { |
| lastEndOffset += offsetStartDeltaBuffer[i] + offsetLengthBuffer[i]; |
| } |
| @@ -1456,15 +1479,15 @@ |
| refillPositions(); |
| posBufferUpto = 0; |
| } |
| - position += posDeltaBuffer[posBufferUpto]; |
| + position += (int)posDeltaBuffer[posBufferUpto]; |
| |
| if (indexHasPayloads) { |
| - payloadLength = payloadLengthBuffer[posBufferUpto]; |
| + payloadLength = (int)payloadLengthBuffer[posBufferUpto]; |
| } |
| |
| if (indexHasOffsets) { |
| - startOffset = lastEndOffset + offsetStartDeltaBuffer[posBufferUpto]; |
| - endOffset = startOffset + offsetLengthBuffer[posBufferUpto]; |
| + startOffset = lastEndOffset + (int)offsetStartDeltaBuffer[posBufferUpto]; |
| + endOffset = startOffset + (int)offsetLengthBuffer[posBufferUpto]; |
| lastEndOffset = endOffset; |
| } |
| |
| Index: lucene/core/src/java/org/apache/lucene/codecs/pfor/PForFactory.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/pfor/PForFactory.java (revision 1367168) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/pfor/PForFactory.java (working copy) |
| @@ -1,129 +0,0 @@ |
| -package org.apache.lucene.codecs.pfor; |
| -/* |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.io.IOException; |
| -import java.nio.ByteBuffer; |
| -import java.nio.IntBuffer; |
| - |
| -import org.apache.lucene.util.IOUtils; |
| -import org.apache.lucene.store.Directory; |
| -import org.apache.lucene.store.IOContext; |
| -import org.apache.lucene.store.IndexInput; |
| -import org.apache.lucene.store.IndexOutput; |
| -import org.apache.lucene.codecs.sep.IntStreamFactory; |
| -import org.apache.lucene.codecs.sep.IntIndexInput; |
| -import org.apache.lucene.codecs.sep.IntIndexOutput; |
| -import org.apache.lucene.codecs.intblock.FixedIntBlockIndexInput; |
| -import org.apache.lucene.codecs.intblock.FixedIntBlockIndexOutput; |
| - |
| -/** |
| - * Used to plug to PostingsReader/WriterBase. |
| - * Encoder and decoder in lower layers are called by |
| - * flushBlock() and readBlock() |
| - */ |
| - |
| -public final class PForFactory extends IntStreamFactory { |
| - |
| - public PForFactory() { |
| - } |
| - |
| - @Override |
| - public IntIndexOutput createOutput(Directory dir, String fileName, IOContext context) throws IOException { |
| - boolean success = false; |
| - IndexOutput out = dir.createOutput(fileName, context); |
| - try { |
| - IntIndexOutput ret = new PForIndexOutput(out); |
| - success = true; |
| - return ret; |
| - } finally { |
| - if (!success) { |
| - // For some cases (e.g. disk full), the IntIndexOutput may not be |
| - // properly created. So we should close those opened files. |
| - IOUtils.closeWhileHandlingException(out); |
| - } |
| - } |
| - } |
| - |
| - @Override |
| - public IntIndexInput openInput(Directory dir, String fileName, IOContext context) throws IOException { |
| - return new PForIndexInput(dir.openInput(fileName, context)); |
| - } |
| - |
| - /** |
| - * Here we'll hold both input buffer and output buffer for |
| - * encoder/decoder. |
| - */ |
| - private class PForIndexInput extends FixedIntBlockIndexInput { |
| - |
| - PForIndexInput(final IndexInput in) throws IOException { |
| - super(in); |
| - } |
| - |
| - class PForBlockReader implements FixedIntBlockIndexInput.BlockReader { |
| - private final byte[] encoded; |
| - private final int[] buffer; |
| - private final IndexInput in; |
| - private final IntBuffer encodedBuffer; |
| - |
| - PForBlockReader(final IndexInput in, final int[] buffer) { |
| - // upperbound for encoded value should include(here header is not buffered): |
| - // 1. blockSize of normal value (4x bytes); |
| - // 2. blockSize of exception value (4x bytes); |
| - this.encoded = new byte[PForPostingsFormat.DEFAULT_BLOCK_SIZE*8]; |
| - this.in = in; |
| - this.buffer = buffer; |
| - this.encodedBuffer = ByteBuffer.wrap(encoded).asIntBuffer(); |
| - } |
| - |
| - // TODO: implement public void skipBlock() {} ? |
| - @Override |
| - public void readBlock() throws IOException { |
| - final int header = in.readInt(); |
| - final int numBytes = PForUtil.getEncodedSize(header); |
| - assert numBytes <= PForPostingsFormat.DEFAULT_BLOCK_SIZE*8; |
| - in.readBytes(encoded,0,numBytes); |
| - PForUtil.decompress(encodedBuffer,buffer,header); |
| - } |
| - } |
| - |
| - @Override |
| - protected BlockReader getBlockReader(final IndexInput in, final int[] buffer) throws IOException { |
| - return new PForBlockReader(in,buffer); |
| - } |
| - } |
| - |
| - private class PForIndexOutput extends FixedIntBlockIndexOutput { |
| - private final byte[] encoded; |
| - private final IntBuffer encodedBuffer; |
| - |
| - PForIndexOutput(IndexOutput out) throws IOException { |
| - super(out, PForPostingsFormat.DEFAULT_BLOCK_SIZE); |
| - this.encoded = new byte[PForPostingsFormat.DEFAULT_BLOCK_SIZE*8]; |
| - this.encodedBuffer=ByteBuffer.wrap(encoded).asIntBuffer(); |
| - } |
| - |
| - @Override |
| - protected void flushBlock() throws IOException { |
| - final int header = PForUtil.compress(buffer,encodedBuffer); |
| - final int numBytes = PForUtil.getEncodedSize(header); |
| - // nocommit writeVInt instead? |
| - out.writeInt(header); |
| - out.writeBytes(encoded, numBytes); |
| - } |
| - } |
| -} |
| Index: lucene/core/src/java/org/apache/lucene/codecs/pfor/ForFactory.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/pfor/ForFactory.java (revision 1367168) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/pfor/ForFactory.java (working copy) |
| @@ -1,128 +0,0 @@ |
| -package org.apache.lucene.codecs.pfor; |
| -/* |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.io.IOException; |
| -import java.nio.ByteBuffer; |
| -import java.nio.IntBuffer; |
| - |
| -import org.apache.lucene.util.IOUtils; |
| -import org.apache.lucene.store.Directory; |
| -import org.apache.lucene.store.IOContext; |
| -import org.apache.lucene.store.IndexInput; |
| -import org.apache.lucene.store.IndexOutput; |
| -import org.apache.lucene.codecs.sep.IntStreamFactory; |
| -import org.apache.lucene.codecs.sep.IntIndexInput; |
| -import org.apache.lucene.codecs.sep.IntIndexOutput; |
| -import org.apache.lucene.codecs.intblock.FixedIntBlockIndexInput; |
| -import org.apache.lucene.codecs.intblock.FixedIntBlockIndexOutput; |
| - |
| -/** |
| - * Used to plug to PostingsReader/WriterBase. |
| - * Encoder and decoder in lower layers are called by |
| - * flushBlock() and readBlock() |
| - */ |
| - |
| -public final class ForFactory extends IntStreamFactory { |
| - |
| - public ForFactory() { |
| - } |
| - |
| - @Override |
| - public IntIndexOutput createOutput(Directory dir, String fileName, IOContext context) throws IOException { |
| - boolean success = false; |
| - IndexOutput out = dir.createOutput(fileName, context); |
| - try { |
| - IntIndexOutput ret = new ForIndexOutput(out); |
| - success = true; |
| - return ret; |
| - } finally { |
| - if (!success) { |
| - // For some cases (e.g. disk full), the IntIndexOutput may not be |
| - // properly created. So we should close those opened files. |
| - IOUtils.closeWhileHandlingException(out); |
| - } |
| - } |
| - } |
| - |
| - @Override |
| - public IntIndexInput openInput(Directory dir, String fileName, IOContext context) throws IOException { |
| - return new ForIndexInput(dir.openInput(fileName, context)); |
| - } |
| - |
| - /** |
| - * Here we'll hold both input buffer and output buffer for |
| - * encoder/decoder. |
| - */ |
| - private class ForIndexInput extends FixedIntBlockIndexInput { |
| - |
| - ForIndexInput(final IndexInput in) throws IOException { |
| - super(in); |
| - } |
| - |
| - class ForBlockReader implements FixedIntBlockIndexInput.BlockReader { |
| - private final byte[] encoded; |
| - private final int[] buffer; |
| - private final IndexInput in; |
| - private final IntBuffer encodedBuffer; |
| - |
| - ForBlockReader(final IndexInput in, final int[] buffer) { |
| - // upperbound for encoded value should include(here header is not buffered): |
| - // blockSize of normal value when numFrameBits=32(4x bytes); |
| - this.encoded = new byte[ForPostingsFormat.DEFAULT_BLOCK_SIZE*4]; |
| - this.in = in; |
| - this.buffer = buffer; |
| - this.encodedBuffer = ByteBuffer.wrap(encoded).asIntBuffer(); |
| - } |
| - |
| - // TODO: implement public void skipBlock() {} ? |
| - @Override |
| - public void readBlock() throws IOException { |
| - final int header = in.readInt(); |
| - final int numBytes = ForUtil.getEncodedSize(header); |
| - assert numBytes <= ForPostingsFormat.DEFAULT_BLOCK_SIZE*4; |
| - in.readBytes(encoded,0,numBytes); |
| - ForUtil.decompress(encodedBuffer,buffer,header); |
| - } |
| - } |
| - |
| - @Override |
| - protected BlockReader getBlockReader(final IndexInput in, final int[] buffer) throws IOException { |
| - return new ForBlockReader(in,buffer); |
| - } |
| - } |
| - |
| - private class ForIndexOutput extends FixedIntBlockIndexOutput { |
| - private final byte[] encoded; |
| - private final IntBuffer encodedBuffer; |
| - |
| - ForIndexOutput(IndexOutput out) throws IOException { |
| - super(out,ForPostingsFormat.DEFAULT_BLOCK_SIZE); |
| - this.encoded = new byte[ForPostingsFormat.DEFAULT_BLOCK_SIZE*4]; |
| - this.encodedBuffer=ByteBuffer.wrap(encoded).asIntBuffer(); |
| - } |
| - |
| - @Override |
| - protected void flushBlock() throws IOException { |
| - final int header = ForUtil.compress(buffer,encodedBuffer); |
| - final int numBytes = ForUtil.getEncodedSize(header); |
| - // nocommit writeVInt instead? |
| - out.writeInt(header); |
| - out.writeBytes(encoded, numBytes); |
| - } |
| - } |
| -} |
| Index: lucene/core/src/java/org/apache/lucene/codecs/pfor/PForUtil.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/pfor/PForUtil.java (revision 1367168) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/pfor/PForUtil.java (working copy) |
| @@ -1,343 +0,0 @@ |
| -package org.apache.lucene.codecs.pfor; |
| -/* |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.nio.IntBuffer; |
| -import java.nio.ByteBuffer; |
| -import java.util.Arrays; |
| - |
| -/** |
| - * Encode all small values and exception pointers in normal area; |
| - * Encode large values in exception area; |
| - * Size per exception is variable, possibly: 1byte, 2bytes, or 4bytes |
| - */ |
| -public final class PForUtil extends ForUtil { |
| - |
| - protected static final int[] PER_EXCEPTION_SIZE = {1,2,4}; |
| - |
| - /** Compress given int[] into Integer buffer, with PFor format |
| - * |
| - * @param data uncompressed data |
| - * @param intBuffer integer buffer to hold compressed data |
| - * @return block header |
| - */ |
| - public static int compress(final int[] data, IntBuffer intBuffer) { |
| - /** estimate minimum compress size to determine numFrameBits */ |
| - int numBits=getNumBits(data); |
| - if (numBits == 0) { |
| - return compressDuplicateBlock(data,intBuffer); |
| - } |
| - |
| - int size = data.length; |
| - int[] excValues = new int[size]; |
| - int excNum = 0, excLastPos = -1, excFirstPos = -1, excLastNonForcePos = -1; |
| - |
| - // num of exception until the last non-forced exception |
| - int excNumBase = 0; |
| - |
| - // bytes per exception |
| - int excBytes = 1; |
| - |
| - // bytes before exception area, e.g. header and normal area |
| - int excByteOffset = 0; |
| - |
| - // the max value possible for current exception pointer, |
| - // value of the first pointer is limited by header as 254 |
| - // (first exception ranges from -1 ~ 254) |
| - long maxChainFirst = 254; |
| - long maxChain = maxChainFirst + 1; |
| - |
| - boolean conValue, conForce, conEnd; |
| - int i=0; |
| - |
| - /** estimate exceptions */ |
| - for (i=0; i<size; ++i) { |
| - conValue = ((data[i] & MASK[numBits]) != data[i]); // value exception |
| - conForce = (i >= maxChain + excLastPos); // force exception |
| - if (conValue || conForce) { |
| - excValues[excNum++] = data[i]; |
| - if (excLastPos == -1) { |
| - maxChain = 1L<<numBits; |
| - excFirstPos = i; |
| - } |
| - if (conValue) { |
| - excLastNonForcePos = i; |
| - excNumBase = excNum; |
| - } |
| - excLastPos = i; |
| - } |
| - } |
| - |
| - /** encode normal area, record exception positions */ |
| - excNum = 0; |
| - if (excFirstPos < 0) { // no exception |
| - for (i=0; i<size; ++i) { |
| - encodeNormalValue(intBuffer,i,data[i], numBits); |
| - } |
| - excLastPos = -1; |
| - } else { |
| - for (i=0; i<excFirstPos; ++i) { |
| - encodeNormalValue(intBuffer,i,data[i], numBits); |
| - } |
| - maxChain = 1L<<numBits; |
| - excLastPos = excFirstPos; |
| - excNum = i<size? 1:0; |
| - for (i=excFirstPos+1; i<size; ++i) { |
| - conValue = ((data[i] & MASK[numBits]) != data[i]); // value exception |
| - conForce = (i >= maxChain + excLastPos); // force exception |
| - conEnd = (excNum == excNumBase); // following forced ignored |
| - if ((!conValue && !conForce) || conEnd) { |
| - encodeNormalValue(intBuffer,i,data[i], numBits); |
| - } else { |
| - encodeNormalValue(intBuffer, excLastPos, i-excLastPos-1, numBits); |
| - excNum++; |
| - excLastPos = i; |
| - } |
| - } |
| - } |
| - |
| - /** encode exception area */ |
| - for (i=0; i<excNum; ++i) { |
| - if (excBytes < 2 && (excValues[i] & ~MASK[8]) != 0) { |
| - excBytes=2; |
| - } |
| - if (excBytes < 4 && (excValues[i] & ~MASK[16]) != 0) { |
| - excBytes=4; |
| - } |
| - } |
| - excByteOffset = (size*numBits + 7)/8; |
| - encodeExcValues(intBuffer, excValues, excNum, excBytes, excByteOffset); |
| - |
| - /** encode header */ |
| - int encodedSize = (excByteOffset + excBytes*excNum + 3)/4; |
| - |
| - return getHeader(encodedSize, numBits, excNum, excFirstPos, excBytes); |
| - } |
| - |
| - /** Decompress given Integer buffer into int array. |
| - * |
| - * @param intBuffer integer buffer to hold compressed data |
| - * @param data int array to hold uncompressed data |
| - */ |
| - public static void decompress(IntBuffer intBuffer, int[] data, int header) { |
| - // since this buffer is reused at upper level, rewind first |
| - intBuffer.rewind(); |
| - |
| - int excNum = ((header >> 8) & MASK[8]) + 1; |
| - int excFirstPos = ((header >> 16) & MASK[8]) - 1; |
| - int excBytes = PER_EXCEPTION_SIZE[(header >> 30) & MASK[2]]; |
| - int numBits = ((header >> 24) & MASK[6]); |
| - |
| - decompressCore(intBuffer, data, numBits); |
| - |
| - patchException(intBuffer,data,excNum,excFirstPos,excBytes); |
| - } |
| - |
| - /** |
| - * Encode exception values into exception area. |
| - * The width for each exception will be fixed as: |
| - * 1, 2, or 4 byte(s). |
| - */ |
| - static void encodeExcValues(IntBuffer intBuffer, int[] values, int num, int perbytes, int byteOffset) { |
| - if (num == 0) |
| - return; |
| - if (perbytes == 1) { |
| - int curBytePos = byteOffset; |
| - for (int i=0; i<num; ++i) { |
| - int curIntPos = curBytePos / 4; |
| - setBufferIntBits(intBuffer, curIntPos, (curBytePos & 3)*8, 8, values[i]); |
| - curBytePos++; |
| - } |
| - } else if (perbytes == 2) { |
| - int shortOffset = (byteOffset+1)/2; |
| - int curIntPos = shortOffset/2; |
| - int i=0; |
| - if ((shortOffset & 1) == 1) { // cut head to ensure remaining fit ints |
| - setBufferIntBits(intBuffer, curIntPos++, 16, 16, values[i++]); |
| - } |
| - for (; i<num-1; i+=2) { |
| - intBuffer.put(curIntPos++, (values[i+1]<<16) | values[i]); |
| - } |
| - if (i<num) { |
| - intBuffer.put(curIntPos, values[i]); // cut tail, also clear high 16 bits |
| - } |
| - } else if (perbytes == 4) { |
| - int curIntPos = (byteOffset+3) / 4; |
| - for (int i=0; i<num; ++i) { |
| - intBuffer.put(curIntPos++, values[i]); |
| - } |
| - } |
| - } |
| - |
| - /** |
| - * Save only header when the whole block equals to 1 |
| - */ |
| - static int compressDuplicateBlock(final int[] data, IntBuffer intBuffer) { |
| - intBuffer.put(0,data[0]); |
| - return getHeader(1, 0, 0, -1, 0); |
| - } |
| - |
| - /** |
| - * Decode exception values base on the exception pointers in normal area, |
| - * and values in exception area. |
| - * As for current implementation, numInts is hardwired as 128, so the |
| - * tail of normal area is naturally aligned to 32 bits, and we don't need to |
| - * rewind intBuffer here. |
| - * However, the normal area may share a same int with exception area, |
| - * when numFrameBits * numInts % 32 != 0, |
| - * In this case we should preprocess patch several heading exceptions, |
| - * before calling this method. |
| - * |
| - */ |
| - public static void patchException(IntBuffer intBuffer, int[] data, int excNum, int excFirstPos, int excBytes) { |
| - if (excFirstPos == -1) { |
| - return; |
| - } |
| - int curPos=excFirstPos; |
| - int i,j; |
| - |
| - if (excBytes == 1) { // each exception consumes 1 byte |
| - for (i=0; i+3<excNum; i+=4) { |
| - final int curInt = intBuffer.get(); |
| - curPos = patch(data, curPos, (curInt) & MASK[8]); |
| - curPos = patch(data, curPos, (curInt >>> 8) & MASK[8]); |
| - curPos = patch(data, curPos, (curInt >>> 16) & MASK[8]); |
| - curPos = patch(data, curPos, (curInt >>> 24) & MASK[8]); |
| - } |
| - if (i<excNum) { |
| - final int curInt = intBuffer.get(); |
| - for (j=0; j<32 && i<excNum; j+=8,i++) { |
| - curPos = patch(data, curPos, (curInt >>> j) & MASK[8]); |
| - } |
| - } |
| - } else if (excBytes == 2) { // each exception consumes 2 bytes |
| - for (i=0; i+1<excNum; i+=2) { |
| - final int curInt = intBuffer.get(); |
| - curPos = patch(data, curPos, (curInt) & MASK[16]); |
| - curPos = patch(data, curPos, (curInt >>> 16) & MASK[16]); |
| - } |
| - if (i<excNum) { |
| - final int curInt = intBuffer.get(); |
| - curPos = patch(data, curPos, (curInt) & MASK[16]); |
| - } |
| - } else if (excBytes == 4) { // each exception consumes 4 bytes |
| - for (i=0; i<excNum; i++) { |
| - curPos = patch(data, curPos, intBuffer.get()); |
| - } |
| - } |
| - } |
| - |
| - static int patch(int[]data, int pos, int value) { |
| - int nextPos = data[pos] + pos + 1; |
| - data[pos] = value; |
| - assert nextPos > pos; |
| - return nextPos; |
| - } |
| - |
| - /** |
| - * Estimate best number of frame bits according to minimum compressed size. |
| - * It will run 32 times. |
| - */ |
| - static int getNumBits(final int[] data) { |
| - if (isAllEqual(data)) { |
| - return 0; |
| - } |
| - int optBits=1; |
| - int optSize=estimateCompressedSize(data,optBits); |
| - for (int i=2; i<=32; ++i) { |
| - int curSize=estimateCompressedSize(data,i); |
| - if (curSize<optSize) { |
| - optSize=curSize; |
| - optBits=i; |
| - } |
| - } |
| - return optBits; |
| - } |
| - |
| - /** |
| - * Iterate the whole block to get maximum exception bits, |
| - * and estimate compressed size without forced exception. |
| - * TODO: foresee forced exception for better estimation |
| - */ |
| - static int estimateCompressedSize(final int[] data, int numBits) { |
| - int size=data.length; |
| - int totalBytes=(numBits*size+7)/8; // always round to byte |
| - int excNum=0; |
| - int curExcBytes=1; |
| - for (int i=0; i<size; ++i) { |
| - if ((data[i] & ~MASK[numBits]) != 0) { // exception |
| - excNum++; |
| - if (curExcBytes<2 && (data[i] & ~MASK[8]) != 0) { // exceed 1 byte exception |
| - curExcBytes=2; |
| - } |
| - if (curExcBytes<4 && (data[i] & ~MASK[16]) != 0) { // exceed 2 byte exception |
| - curExcBytes=4; |
| - } |
| - } |
| - } |
| - if (curExcBytes==2) { |
| - totalBytes=((totalBytes+1)/2)*2; // round up to 2x bytes before filling exceptions |
| - } |
| - else if (curExcBytes==4) { |
| - totalBytes=((totalBytes+3)/4)*4; // round up to 4x bytes |
| - } |
| - totalBytes+=excNum*curExcBytes; |
| - |
| - return totalBytes/4*4; // round up to ints |
| - } |
| - |
| - /** |
| - * Generate the 4 byte header which contains (from lsb to msb): |
| - * |
| - * 8 bits for encoded block int size (excluding header, this limits DEFAULT_BLOCK_SIZE <= 2^(8-1)) |
| - * |
| - * 8 bits for exception num - 1 (when no exceptions, this is undefined) |
| - * |
| - * 8 bits for the index of the first exception + 1 (when no exception, this is 0) |
| - * |
| - * 6 bits for num of frame bits (when 0, values in this block are all the same) |
| - * 2 bits for the exception code: 00: byte, 01: short, 10: int |
| - * |
| - */ |
| - static int getHeader(int encodedSize, int numBits, int excNum, int excFirstPos, int excBytes) { |
| - return (encodedSize) |
| - | (((excNum-1) & MASK[8]) << 8) |
| - | ((excFirstPos+1) << 16) |
| - | ((numBits) << 24) |
| - | ((excBytes/2) << 30); |
| - } |
| - |
| - |
| - /** |
| - * Expert: get metadata from header. |
| - */ |
| - public static int getEncodedSize(int header) { |
| - return ((header & MASK[8]))*4; |
| - } |
| - public static int getExcNum(int header) { |
| - return ((header >> 8) & MASK[8]) + 1; |
| - } |
| - public static int getFirstPos(int header) { |
| - return ((header >> 16) & MASK[8]) - 1; |
| - } |
| - public static int getExcBytes(int header) { |
| - return PER_EXCEPTION_SIZE[(header >> 30) & MASK[2]]; |
| - } |
| - public static int getNumBits(int header) { |
| - return ((header >> 24) & MASK[6]); |
| - } |
| -} |
| Index: lucene/core/src/java/org/apache/lucene/codecs/pfor/ForUtil.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/pfor/ForUtil.java (revision 1367168) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/pfor/ForUtil.java (working copy) |
| @@ -17,6 +17,8 @@ |
| */ |
| |
| import java.nio.IntBuffer; |
| +import java.nio.LongBuffer; |
| +import java.util.Arrays; |
| |
| /** |
| * Encode all values in normal area with fixed bit width, |
| @@ -38,36 +40,31 @@ |
| * @param intBuffer integer buffer to hold compressed data |
| * @return encoded block byte size |
| */ |
| - public static int compress(final int[] data, IntBuffer intBuffer) { |
| - int numBits=getNumBits(data); |
| - if (numBits == 0) { |
| - return compressDuplicateBlock(data,intBuffer); |
| + public static int compress(LongBuffer data, IntBuffer intBuffer) { |
| + int numBits=getNumBits(data.array()); |
| + |
| + |
| + if (numBits == 0) { // when block is equal, save the value once |
| + intBuffer.put(0, (int)data.get(0)); |
| + return (getHeader(1,numBits)); |
| } |
| |
| - int size=data.length; |
| + int size=128; |
| int encodedSize = (size*numBits+31)/32; |
| |
| for (int i=0; i<size; ++i) { |
| - encodeNormalValue(intBuffer,i,data[i], numBits); |
| + encodeNormalValue(intBuffer,i,(int)data.get(i), numBits); |
| } |
| |
| return getHeader(encodedSize, numBits); |
| } |
| |
| - /** |
| - * Save only one int when the whole block equals to 1 |
| - */ |
| - static int compressDuplicateBlock(final int[] data, IntBuffer intBuffer) { |
| - intBuffer.put(0,data[0]); |
| - return getHeader(1, 0); |
| - } |
| - |
| /** Decompress given Integer buffer into int array. |
| * |
| * @param intBuffer integer buffer to hold compressed data |
| * @param data int array to hold uncompressed data |
| */ |
| - public static void decompress(IntBuffer intBuffer, int[] data, int header) { |
| + public static void decompress(LongBuffer data, IntBuffer intBuffer, int header) { |
| // since this buffer is reused at upper level, rewind first |
| intBuffer.rewind(); |
| |
| @@ -76,7 +73,13 @@ |
| |
| int numBits = ((header >> 8) & MASK[6]); |
| |
| - decompressCore(intBuffer, data, numBits); |
| + |
| + if (numBits == 0) { |
| + Arrays.fill(data.array(), intBuffer.get(0)); |
| + return; |
| + } |
| + |
| + decompressCore(intBuffer, data.array(), numBits); |
| } |
| |
| /** |
| @@ -84,13 +87,12 @@ |
| * caller should ensure that the position is set to the first |
| * encoded int before decoding. |
| */ |
| - static void decompressCore(IntBuffer intBuffer, int[] data, int numBits) { |
| + static void decompressCore(IntBuffer intBuffer, long[] data, int numBits) { |
| assert numBits<=32; |
| - assert numBits>=0; |
| + assert numBits>0; |
| |
| // TODO: PackedIntsDecompress is hardewired to size==128 only |
| switch(numBits) { |
| - case 0: PackedIntsDecompress.decode0(intBuffer, data); break; |
| case 1: PackedIntsDecompress.decode1(intBuffer, data); break; |
| case 2: PackedIntsDecompress.decode2(intBuffer, data); break; |
| case 3: PackedIntsDecompress.decode3(intBuffer, data); break; |
| @@ -149,7 +151,8 @@ |
| /** |
| * Estimate best num of frame bits according to the largest value. |
| */ |
| - static int getNumBits(final int[] data) { |
| + |
| + static int getNumBits(final long[] data) { |
| if (isAllEqual(data)) { |
| return 0; |
| } |
| @@ -163,9 +166,9 @@ |
| return optBits; |
| } |
| |
| - protected static boolean isAllEqual(final int[] data) { |
| + protected static boolean isAllEqual(final long[] data) { |
| int len = data.length; |
| - int v = data[0]; |
| + long v = data[0]; |
| for (int i=1; i<len; i++) { |
| if (data[i] != v) { |
| return false; |
| @@ -173,23 +176,10 @@ |
| } |
| return true; |
| } |
| - |
| - /** |
| - * Generate the 4 byte header, which contains (from lsb to msb): |
| - * |
| - * 8 bits for encoded block int size (excluded header, this limits DEFAULT_BLOCK_SIZE <= 2^8) |
| - * 6 bits for num of frame bits (when 0, values in this block are all the same) |
| - * other bits unused |
| - * |
| - */ |
| static int getHeader(int encodedSize, int numBits) { |
| return (encodedSize) |
| | ((numBits) << 8); |
| } |
| - |
| - /** |
| - * Expert: get metadata from header. |
| - */ |
| public static int getEncodedSize(int header) { |
| return ((header & MASK[8]))*4; |
| } |
| Index: lucene/core/src/java/org/apache/lucene/codecs/pfor/gendecompress.py |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/pfor/gendecompress.py (revision 1367168) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/pfor/gendecompress.py (working copy) |
| @@ -82,7 +82,7 @@ |
| w(' }\n') |
| |
| for numFrameBits in xrange(1, 33): |
| - w(' public static void decode%d(final IntBuffer compressedBuffer, final int[] output) {\n' % numFrameBits) |
| + w(' public static void decode%d(final IntBuffer compressedBuffer, final long[] output) {\n' % numFrameBits) |
| w(' final int numFrameBits = %d;\n' % numFrameBits) |
| w(' final int mask = (int) ((1L<<numFrameBits) - 1);\n') |
| w(' int outputOffset = 0;\n') |
| Index: lucene/core/src/java/org/apache/lucene/codecs/pfor/PForPostingsFormat.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/pfor/PForPostingsFormat.java (revision 1367168) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/pfor/PForPostingsFormat.java (working copy) |
| @@ -1,115 +0,0 @@ |
| -package org.apache.lucene.codecs.pfor; |
| - |
| -/* |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.io.IOException; |
| -import java.util.Set; |
| - |
| -import org.apache.lucene.codecs.BlockTreeTermsReader; |
| -import org.apache.lucene.codecs.BlockTreeTermsWriter; |
| -import org.apache.lucene.codecs.FieldsConsumer; |
| -import org.apache.lucene.codecs.FieldsProducer; |
| -import org.apache.lucene.codecs.FixedGapTermsIndexReader; |
| -import org.apache.lucene.codecs.FixedGapTermsIndexWriter; |
| -import org.apache.lucene.codecs.PostingsFormat; |
| -import org.apache.lucene.codecs.PostingsReaderBase; |
| -import org.apache.lucene.codecs.PostingsWriterBase; |
| -import org.apache.lucene.codecs.TermsIndexReaderBase; |
| -import org.apache.lucene.codecs.TermsIndexWriterBase; |
| -import org.apache.lucene.codecs.sep.SepPostingsReader; |
| -import org.apache.lucene.codecs.sep.SepPostingsWriter; |
| -import org.apache.lucene.index.SegmentInfo; |
| -import org.apache.lucene.index.SegmentReadState; |
| -import org.apache.lucene.index.SegmentWriteState; |
| -import org.apache.lucene.store.IOContext; |
| -import org.apache.lucene.util.BytesRef; |
| -import org.apache.lucene.util.IOUtils; |
| - |
| -/** |
| - * Pass PForFactory to a PostingsWriter/ReaderBase, and get |
| - * customized postings format plugged. |
| - */ |
| -public final class PForPostingsFormat extends PostingsFormat { |
| - private final int minBlockSize; |
| - private final int maxBlockSize; |
| - public final static int DEFAULT_BLOCK_SIZE = 128; |
| - |
| - public PForPostingsFormat() { |
| - super("PFor"); |
| - this.minBlockSize = BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE; |
| - this.maxBlockSize = BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE; |
| - } |
| - public PForPostingsFormat(int minBlockSize, int maxBlockSize) { |
| - super("PFor"); |
| - this.minBlockSize = minBlockSize; |
| - assert minBlockSize > 1; |
| - this.maxBlockSize = maxBlockSize; |
| - assert minBlockSize <= maxBlockSize; |
| - } |
| - |
| - @Override |
| - public String toString() { |
| - return getName() + "(blocksize=" + DEFAULT_BLOCK_SIZE+ ")"; |
| - } |
| - |
| - @Override |
| - public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { |
| - // TODO: implement a new PostingsWriterBase to improve skip-settings |
| - PostingsWriterBase postingsWriter = new SepPostingsWriter(state, new PForFactory()); |
| - boolean success = false; |
| - try { |
| - FieldsConsumer ret = new BlockTreeTermsWriter(state, |
| - postingsWriter, |
| - minBlockSize, |
| - maxBlockSize); |
| - success = true; |
| - return ret; |
| - } finally { |
| - if (!success) { |
| - IOUtils.closeWhileHandlingException(postingsWriter); |
| - } |
| - } |
| - } |
| - |
| - @Override |
| - public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { |
| - PostingsReaderBase postingsReader = new SepPostingsReader(state.dir, |
| - state.fieldInfos, |
| - state.segmentInfo, |
| - state.context, |
| - new PForFactory(), |
| - state.segmentSuffix); |
| - |
| - boolean success = false; |
| - try { |
| - FieldsProducer ret = new BlockTreeTermsReader(state.dir, |
| - state.fieldInfos, |
| - state.segmentInfo.name, |
| - postingsReader, |
| - state.context, |
| - state.segmentSuffix, |
| - state.termsIndexDivisor); |
| - success = true; |
| - return ret; |
| - } finally { |
| - if (!success) { |
| - IOUtils.closeWhileHandlingException(postingsReader); |
| - } |
| - } |
| - } |
| -} |
| Index: lucene/core/src/java/org/apache/lucene/codecs/pfor/ForPostingsFormat.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/pfor/ForPostingsFormat.java (revision 1367168) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/pfor/ForPostingsFormat.java (working copy) |
| @@ -1,116 +0,0 @@ |
| -package org.apache.lucene.codecs.pfor; |
| - |
| -/* |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.io.IOException; |
| -import java.util.Set; |
| - |
| -import org.apache.lucene.codecs.BlockTreeTermsReader; |
| -import org.apache.lucene.codecs.BlockTreeTermsWriter; |
| -import org.apache.lucene.codecs.FieldsConsumer; |
| -import org.apache.lucene.codecs.FieldsProducer; |
| -import org.apache.lucene.codecs.FixedGapTermsIndexReader; |
| -import org.apache.lucene.codecs.FixedGapTermsIndexWriter; |
| -import org.apache.lucene.codecs.PostingsFormat; |
| -import org.apache.lucene.codecs.PostingsReaderBase; |
| -import org.apache.lucene.codecs.PostingsWriterBase; |
| -import org.apache.lucene.codecs.TermsIndexReaderBase; |
| -import org.apache.lucene.codecs.TermsIndexWriterBase; |
| -import org.apache.lucene.codecs.sep.SepPostingsReader; |
| -import org.apache.lucene.codecs.sep.SepPostingsWriter; |
| -import org.apache.lucene.index.SegmentInfo; |
| -import org.apache.lucene.index.SegmentReadState; |
| -import org.apache.lucene.index.SegmentWriteState; |
| -import org.apache.lucene.store.IOContext; |
| -import org.apache.lucene.util.BytesRef; |
| -import org.apache.lucene.util.IOUtils; |
| - |
| -/** |
| - * Pass ForFactory to a PostingsWriter/ReaderBase, and get |
| - * customized postings format plugged. |
| - */ |
| -public final class ForPostingsFormat extends PostingsFormat { |
| - private final int minBlockSize; |
| - private final int maxBlockSize; |
| - public final static int DEFAULT_BLOCK_SIZE = 128; |
| - |
| - public ForPostingsFormat() { |
| - super("For"); |
| - this.minBlockSize = BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE; |
| - this.maxBlockSize = BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE; |
| - } |
| - |
| - public ForPostingsFormat(int minBlockSize, int maxBlockSize) { |
| - super("For"); |
| - this.minBlockSize = minBlockSize; |
| - assert minBlockSize > 1; |
| - this.maxBlockSize = maxBlockSize; |
| - assert minBlockSize <= maxBlockSize; |
| - } |
| - |
| - @Override |
| - public String toString() { |
| - return getName() + "(blocksize=" + DEFAULT_BLOCK_SIZE + ")"; |
| - } |
| - |
| - @Override |
| - public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { |
| - // TODO: implement a new PostingsWriterBase to improve skip-settings |
| - PostingsWriterBase postingsWriter = new SepPostingsWriter(state, new ForFactory()); |
| - boolean success = false; |
| - try { |
| - FieldsConsumer ret = new BlockTreeTermsWriter(state, |
| - postingsWriter, |
| - minBlockSize, |
| - maxBlockSize); |
| - success = true; |
| - return ret; |
| - } finally { |
| - if (!success) { |
| - IOUtils.closeWhileHandlingException(postingsWriter); |
| - } |
| - } |
| - } |
| - |
| - @Override |
| - public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { |
| - PostingsReaderBase postingsReader = new SepPostingsReader(state.dir, |
| - state.fieldInfos, |
| - state.segmentInfo, |
| - state.context, |
| - new ForFactory(), |
| - state.segmentSuffix); |
| - |
| - boolean success = false; |
| - try { |
| - FieldsProducer ret = new BlockTreeTermsReader(state.dir, |
| - state.fieldInfos, |
| - state.segmentInfo.name, |
| - postingsReader, |
| - state.context, |
| - state.segmentSuffix, |
| - state.termsIndexDivisor); |
| - success = true; |
| - return ret; |
| - } finally { |
| - if (!success) { |
| - IOUtils.closeWhileHandlingException(postingsReader); |
| - } |
| - } |
| - } |
| -} |
| Index: lucene/core/src/java/org/apache/lucene/codecs/pfor/PackedIntsDecompress.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/pfor/PackedIntsDecompress.java (revision 1367168) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/pfor/PackedIntsDecompress.java (working copy) |
| @@ -30,7 +30,7 @@ |
| public static void decode0(final IntBuffer compressedBuffer, final int[] output) { |
| Arrays.fill(output, compressedBuffer.get()); |
| } |
| - public static void decode1(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode1(final IntBuffer compressedBuffer, final long[] output) { |
| final int numFrameBits = 1; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| @@ -71,7 +71,7 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode2(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode2(final IntBuffer compressedBuffer, final long[] output) { |
| final int numFrameBits = 2; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| @@ -113,7 +113,7 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode3(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode3(final IntBuffer compressedBuffer, final long[] output) { |
| final int numFrameBits = 3; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| @@ -156,7 +156,7 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode4(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode4(final IntBuffer compressedBuffer, final long[] output) { |
| final int numFrameBits = 4; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| @@ -200,7 +200,7 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode5(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode5(final IntBuffer compressedBuffer, final long[] output) { |
| final int numFrameBits = 5; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| @@ -245,7 +245,7 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode6(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode6(final IntBuffer compressedBuffer, final long[] output) { |
| final int numFrameBits = 6; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| @@ -291,7 +291,7 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode7(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode7(final IntBuffer compressedBuffer, final long[] output) { |
| final int numFrameBits = 7; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| @@ -338,7 +338,7 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode8(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode8(final IntBuffer compressedBuffer, final long[] output) { |
| final int numFrameBits = 8; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| @@ -386,7 +386,7 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode9(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode9(final IntBuffer compressedBuffer, final long[] output) { |
| final int numFrameBits = 9; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| @@ -435,7 +435,7 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode10(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode10(final IntBuffer compressedBuffer, final long[] output) { |
| final int numFrameBits = 10; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| @@ -485,7 +485,7 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode11(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode11(final IntBuffer compressedBuffer, final long[] output) { |
| final int numFrameBits = 11; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| @@ -536,7 +536,7 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode12(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode12(final IntBuffer compressedBuffer, final long[] output) { |
| final int numFrameBits = 12; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| @@ -588,7 +588,7 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode13(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode13(final IntBuffer compressedBuffer, final long[] output) { |
| final int numFrameBits = 13; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| @@ -641,7 +641,7 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode14(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode14(final IntBuffer compressedBuffer, final long[] output) { |
| final int numFrameBits = 14; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| @@ -695,7 +695,7 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode15(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode15(final IntBuffer compressedBuffer, final long[] output) { |
| final int numFrameBits = 15; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| @@ -750,7 +750,7 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode16(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode16(final IntBuffer compressedBuffer, final long[] output) { |
| final int numFrameBits = 16; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| @@ -806,7 +806,7 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode17(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode17(final IntBuffer compressedBuffer, final long[] output) { |
| final int numFrameBits = 17; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| @@ -863,7 +863,7 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode18(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode18(final IntBuffer compressedBuffer, final long[] output) { |
| final int numFrameBits = 18; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| @@ -921,7 +921,7 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode19(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode19(final IntBuffer compressedBuffer, final long[] output) { |
| final int numFrameBits = 19; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| @@ -980,7 +980,7 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode20(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode20(final IntBuffer compressedBuffer, final long[] output) { |
| final int numFrameBits = 20; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| @@ -1040,7 +1040,7 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode21(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode21(final IntBuffer compressedBuffer, final long[] output) { |
| final int numFrameBits = 21; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| @@ -1101,7 +1101,7 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode22(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode22(final IntBuffer compressedBuffer, final long[] output) { |
| final int numFrameBits = 22; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| @@ -1163,7 +1163,7 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode23(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode23(final IntBuffer compressedBuffer, final long[] output) { |
| final int numFrameBits = 23; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| @@ -1226,7 +1226,7 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode24(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode24(final IntBuffer compressedBuffer, final long[] output) { |
| final int numFrameBits = 24; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| @@ -1290,7 +1290,7 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode25(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode25(final IntBuffer compressedBuffer, final long[] output) { |
| final int numFrameBits = 25; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| @@ -1355,7 +1355,7 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode26(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode26(final IntBuffer compressedBuffer, final long[] output) { |
| final int numFrameBits = 26; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| @@ -1421,7 +1421,7 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode27(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode27(final IntBuffer compressedBuffer, final long[] output) { |
| final int numFrameBits = 27; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| @@ -1488,7 +1488,7 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode28(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode28(final IntBuffer compressedBuffer, final long[] output) { |
| final int numFrameBits = 28; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| @@ -1556,7 +1556,7 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode29(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode29(final IntBuffer compressedBuffer, final long[] output) { |
| final int numFrameBits = 29; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| @@ -1625,7 +1625,7 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode30(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode30(final IntBuffer compressedBuffer, final long[] output) { |
| final int numFrameBits = 30; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| @@ -1695,7 +1695,7 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode31(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode31(final IntBuffer compressedBuffer, final long[] output) { |
| final int numFrameBits = 31; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| @@ -1766,7 +1766,7 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode32(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode32(final IntBuffer compressedBuffer, final long[] output) { |
| final int numFrameBits = 32; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| Index: lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat |
| =================================================================== |
| --- lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat (revision 1367168) |
| +++ lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat (working copy) |
| @@ -17,8 +17,6 @@ |
| org.apache.lucene.codecs.pulsing.Pulsing40PostingsFormat |
| org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat |
| org.apache.lucene.codecs.memory.MemoryPostingsFormat |
| -org.apache.lucene.codecs.pfor.ForPostingsFormat |
| -org.apache.lucene.codecs.pfor.PForPostingsFormat |
| org.apache.lucene.codecs.bulkvint.BulkVIntPostingsFormat |
| org.apache.lucene.codecs.block.BlockPostingsFormat |
| org.apache.lucene.codecs.memory.DirectPostingsFormat |