| Index: lucene/src/java/org/apache/lucene/store/MMapDirectory.java
|
| ===================================================================
|
| --- lucene/src/java/org/apache/lucene/store/MMapDirectory.java (revision 1135395)
|
| +++ lucene/src/java/org/apache/lucene/store/MMapDirectory.java (working copy)
|
| @@ -79,8 +79,8 @@
|
| */ |
| public class MMapDirectory extends FSDirectory { |
| private boolean useUnmapHack = UNMAP_SUPPORTED; |
| - public static final int DEFAULT_MAX_BUFF = Constants.JRE_IS_64BIT ? Integer.MAX_VALUE : (256 * 1024 * 1024); |
| - private int maxBBuf = DEFAULT_MAX_BUFF; |
| + public static final int DEFAULT_MAX_BUFF = Constants.JRE_IS_64BIT ? (1 << 30) : (1 << 28); |
| + private int chunkSizePower; |
| |
| /** Create a new MMapDirectory for the named location. |
| * |
| @@ -91,6 +91,7 @@
|
| */ |
| public MMapDirectory(File path, LockFactory lockFactory) throws IOException { |
| super(path, lockFactory); |
| + setMaxChunkSize(DEFAULT_MAX_BUFF); |
| } |
| |
| /** Create a new MMapDirectory for the named location and {@link NativeFSLockFactory}. |
| @@ -100,6 +101,7 @@
|
| */ |
| public MMapDirectory(File path) throws IOException { |
| super(path, null); |
| + setMaxChunkSize(DEFAULT_MAX_BUFF); |
| } |
| |
| /** |
| @@ -180,23 +182,28 @@
|
| * Especially on 32 bit platform, the address space can be very fragmented, |
| * so large index files cannot be mapped. |
| * Using a lower chunk size makes the directory implementation a little |
| - * bit slower (as the correct chunk must be resolved on each seek) |
| + * bit slower (as the correct chunk may be resolved on lots of seeks) |
| * but the chance is higher that mmap does not fail. On 64 bit |
| - * Java platforms, this parameter should always be {@link Integer#MAX_VALUE}, |
| + * Java platforms, this parameter should always be {@code 1 << 30}, |
| * as the address space is big enough. |
| + * <b>Please note:</b> This method always rounds down the chunk size |
| + * to a power of 2. |
| */ |
| - public void setMaxChunkSize(final int maxBBuf) { |
| - if (maxBBuf<=0) |
| + public final void setMaxChunkSize(final int maxChunkSize) { |
| + if (maxChunkSize <= 0) |
| throw new IllegalArgumentException("Maximum chunk size for mmap must be >0"); |
| - this.maxBBuf=maxBBuf; |
| + //System.out.println("Requested chunk size: "+maxChunkSize); |
| + this.chunkSizePower = 31 - Integer.numberOfLeadingZeros(maxChunkSize); |
| + assert this.chunkSizePower >=0 && this.chunkSizePower <=30; |
| + //System.out.println("Got chunk size: "+getMaxChunkSize()); |
| } |
| |
| /** |
| * Returns the current mmap chunk size. |
| * @see #setMaxChunkSize |
| */ |
| - public int getMaxChunkSize() { |
| - return maxBBuf; |
| + public final int getMaxChunkSize() { |
| + return 1 << chunkSizePower; |
| } |
| |
| /** Creates an IndexInput for the file with the given name. */ |
| @@ -206,149 +213,55 @@
|
| File f = new File(getDirectory(), name); |
| RandomAccessFile raf = new RandomAccessFile(f, "r"); |
| try { |
| - return (raf.length() <= maxBBuf) |
| - ? (IndexInput) new MMapIndexInput(raf) |
| - : (IndexInput) new MultiMMapIndexInput(raf, maxBBuf); |
| + return new MMapIndexInput(raf, chunkSizePower); |
| } finally { |
| raf.close(); |
| } |
| } |
| |
| - private class MMapIndexInput extends IndexInput { |
| - |
| - private ByteBuffer buffer; |
| - private final long length; |
| - private boolean isClone = false; |
| - |
| - private MMapIndexInput(RandomAccessFile raf) throws IOException { |
| - this.length = raf.length(); |
| - this.buffer = raf.getChannel().map(MapMode.READ_ONLY, 0, length); |
| - } |
| - |
| - @Override |
| - public byte readByte() throws IOException { |
| - try { |
| - return buffer.get(); |
| - } catch (BufferUnderflowException e) { |
| - throw new IOException("read past EOF"); |
| - } |
| - } |
| - |
| - @Override |
| - public void readBytes(byte[] b, int offset, int len) throws IOException { |
| - try { |
| - buffer.get(b, offset, len); |
| - } catch (BufferUnderflowException e) { |
| - throw new IOException("read past EOF"); |
| - } |
| - } |
| - |
| - @Override |
| - public short readShort() throws IOException { |
| - try { |
| - return buffer.getShort(); |
| - } catch (BufferUnderflowException e) { |
| - throw new IOException("read past EOF"); |
| - } |
| - } |
| - |
| - @Override |
| - public int readInt() throws IOException { |
| - try { |
| - return buffer.getInt(); |
| - } catch (BufferUnderflowException e) { |
| - throw new IOException("read past EOF"); |
| - } |
| - } |
| - |
| - @Override |
| - public long readLong() throws IOException { |
| - try { |
| - return buffer.getLong(); |
| - } catch (BufferUnderflowException e) { |
| - throw new IOException("read past EOF"); |
| - } |
| - } |
| - |
| - @Override |
| - public long getFilePointer() { |
| - return buffer.position(); |
| - } |
| - |
| - @Override |
| - public void seek(long pos) throws IOException { |
| - buffer.position((int)pos); |
| - } |
| - |
| - @Override |
| - public long length() { |
| - return length; |
| - } |
| - |
| - @Override |
| - public Object clone() { |
| - if (buffer == null) |
| - throw new AlreadyClosedException("MMapIndexInput already closed"); |
| - MMapIndexInput clone = (MMapIndexInput)super.clone(); |
| - clone.isClone = true; |
| - clone.buffer = buffer.duplicate(); |
| - return clone; |
| - } |
| - |
| - @Override |
| - public void close() throws IOException { |
| - // unmap the buffer (if enabled) and at least unset it for GC |
| - try { |
| - if (isClone || buffer == null) return; |
| - cleanMapping(buffer); |
| - } finally { |
| - buffer = null; |
| - } |
| - } |
| - } |
| - |
| // Because Java's ByteBuffer uses an int to address the |
| // values, it's necessary to access a file > |
| // Integer.MAX_VALUE in size using multiple byte buffers. |
| - private class MultiMMapIndexInput extends IndexInput { |
| + private final class MMapIndexInput extends IndexInput { |
| |
| private ByteBuffer[] buffers; |
| |
| - private final long length; |
| + private final long length, chunkSizeMask, chunkSize; |
| + private final int chunkSizePower; |
| |
| private int curBufIndex; |
| - private final int maxBufSize; |
| |
| private ByteBuffer curBuf; // redundant for speed: buffers[curBufIndex] |
| |
| private boolean isClone = false; |
| |
| - public MultiMMapIndexInput(RandomAccessFile raf, int maxBufSize) |
| - throws IOException { |
| + MMapIndexInput(RandomAccessFile raf, int chunkSizePower) throws IOException { |
| this.length = raf.length(); |
| - this.maxBufSize = maxBufSize; |
| + this.chunkSizePower = chunkSizePower; |
| + this.chunkSize = 1L << chunkSizePower; |
| + this.chunkSizeMask = chunkSize - 1L; |
| |
| - if (maxBufSize <= 0) |
| - throw new IllegalArgumentException("Non positive maxBufSize: " |
| - + maxBufSize); |
| + if (chunkSizePower < 0 || chunkSizePower > 30) |
| + throw new IllegalArgumentException("Invalid chunkSizePower used for ByteBuffer size: " + chunkSizePower); |
| |
| - if ((length / maxBufSize) > Integer.MAX_VALUE) |
| - throw new IllegalArgumentException |
| - ("RandomAccessFile too big for maximum buffer size: " |
| - + raf.toString()); |
| + if ((length >>> chunkSizePower) >= Integer.MAX_VALUE) |
| + throw new IllegalArgumentException("RandomAccessFile too big for chunk size: " + raf.toString()); |
| |
| - int nrBuffers = (int) (length / maxBufSize); |
| - if (((long) nrBuffers * maxBufSize) <= length) nrBuffers++; |
| + // we always allocate one more buffer, the last one may be a 0 byte one |
| + final int nrBuffers = (int) (length >>> chunkSizePower) + 1; |
| |
| + //System.out.println("length="+length+", chunkSizePower=" + chunkSizePower + ", chunkSizeMask=" + chunkSizeMask + ", nrBuffers=" + nrBuffers); |
| + |
| this.buffers = new ByteBuffer[nrBuffers]; |
| |
| - long bufferStart = 0; |
| + long bufferStart = 0L; |
| FileChannel rafc = raf.getChannel(); |
| for (int bufNr = 0; bufNr < nrBuffers; bufNr++) { |
| - int bufSize = (length > (bufferStart + maxBufSize)) |
| - ? maxBufSize |
| - : (int) (length - bufferStart); |
| - this.buffers[bufNr] = rafc.map(MapMode.READ_ONLY,bufferStart,bufSize); |
| + int bufSize = (int) ( (length > (bufferStart + chunkSize)) |
| + ? chunkSize |
| + : (length - bufferStart) |
| + ); |
| + this.buffers[bufNr] = rafc.map(MapMode.READ_ONLY, bufferStart, bufSize); |
| bufferStart += bufSize; |
| } |
| seek(0L); |
| @@ -359,11 +272,13 @@
|
| try { |
| return curBuf.get(); |
| } catch (BufferUnderflowException e) { |
| - curBufIndex++; |
| - if (curBufIndex >= buffers.length) |
| - throw new IOException("read past EOF"); |
| - curBuf = buffers[curBufIndex]; |
| - curBuf.position(0); |
| + do { |
| + curBufIndex++; |
| + if (curBufIndex >= buffers.length) |
| + throw new IOException("read past EOF"); |
| + curBuf = buffers[curBufIndex]; |
| + curBuf.position(0); |
| + } while (!curBuf.hasRemaining()); |
| return curBuf.get(); |
| } |
| } |
| @@ -418,15 +333,28 @@
|
| |
| @Override |
| public long getFilePointer() { |
| - return ((long) curBufIndex * maxBufSize) + curBuf.position(); |
| + return (((long) curBufIndex) << chunkSizePower) + curBuf.position(); |
| } |
| |
| @Override |
| public void seek(long pos) throws IOException { |
| - curBufIndex = (int) (pos / maxBufSize); |
| - curBuf = buffers[curBufIndex]; |
| - int bufOffset = (int) (pos - ((long) curBufIndex * maxBufSize)); |
| - curBuf.position(bufOffset); |
| + // we use >> here to preserve negative, so we will catch AIOOBE: |
| + final int bi = (int) (pos >> chunkSizePower); |
| + try { |
| + final ByteBuffer b = buffers[bi]; |
| + b.position((int) (pos & chunkSizeMask)); |
| + // write values, on exception all is unchanged |
| + this.curBufIndex = bi; |
| + this.curBuf = b; |
| + } catch (ArrayIndexOutOfBoundsException aioobe) { |
| + if (pos < 0L) |
| + throw new IllegalArgumentException("Seeking to negative position"); |
| + throw new IOException("seek past EOF"); |
| + } catch (IllegalArgumentException iae) { |
| + if (pos < 0L) |
| + throw new IllegalArgumentException("Seeking to negative position"); |
| + throw new IOException("seek past EOF"); |
| + } |
| } |
| |
| @Override |
| @@ -437,8 +365,8 @@
|
| @Override |
| public Object clone() { |
| if (buffers == null) |
| - throw new AlreadyClosedException("MultiMMapIndexInput already closed"); |
| - MultiMMapIndexInput clone = (MultiMMapIndexInput)super.clone(); |
| + throw new AlreadyClosedException("MMapIndexInput already closed"); |
| + final MMapIndexInput clone = (MMapIndexInput)super.clone(); |
| clone.isClone = true; |
| clone.buffers = new ByteBuffer[buffers.length]; |
| // Since most clones will use only one buffer, duplicate() could also be |
| @@ -449,9 +377,7 @@
|
| try { |
| clone.seek(getFilePointer()); |
| } catch(IOException ioe) { |
| - RuntimeException newException = new RuntimeException(ioe); |
| - newException.initCause(ioe); |
| - throw newException; |
| + throw new RuntimeException("Should never happen", ioe); |
| } |
| return clone; |
| } |
| Index: lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java
|
| ===================================================================
|
| --- lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java (revision 1135395)
|
| +++ lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java (working copy)
|
| @@ -390,8 +390,6 @@
|
| fail(); |
| } catch (IOException e) { |
| // expected exception |
| - } catch (IllegalArgumentException e) { |
| - // mmapdir will give us this from java.nio.Buffer.position() |
| } finally { |
| reader.close(); |
| } |
| @@ -402,8 +400,6 @@
|
| fail(); |
| } catch (IOException e) { |
| // expected exception |
| - } catch (IllegalArgumentException e) { |
| - // mmapdir will give us this from java.nio.Buffer.position() |
| } finally { |
| reader.close(); |
| } |
| Index: lucene/src/test/org/apache/lucene/store/TestMultiMMap.java
|
| ===================================================================
|
| --- lucene/src/test/org/apache/lucene/store/TestMultiMMap.java (revision 1135395)
|
| +++ lucene/src/test/org/apache/lucene/store/TestMultiMMap.java (working copy)
|
| @@ -25,6 +25,7 @@
|
| import org.apache.lucene.document.Field; |
| import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.index.RandomIndexWriter; |
| +import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.LuceneTestCase; |
| import org.apache.lucene.util._TestUtil; |
| |
| @@ -40,11 +41,70 @@
|
| |
| @Override |
| public void setUp() throws Exception { |
| - super.setUp(); |
| - workDir = _TestUtil.getTempDir("TestMultiMMap"); |
| - workDir.mkdirs(); |
| + super.setUp(); |
| + assumeTrue("test requires a jre that supports unmapping", MMapDirectory.UNMAP_SUPPORTED); |
| + workDir = _TestUtil.getTempDir("TestMultiMMap"); |
| + workDir.mkdirs(); |
| } |
| + |
| + public void testSeekZero() throws Exception { |
| + for (int i = 0; i < 31; i++) { |
| + MMapDirectory mmapDir = new MMapDirectory(_TestUtil.getTempDir("testSeekZero")); |
| + mmapDir.setMaxChunkSize(1<<i); |
| + IndexOutput io = mmapDir.createOutput("zeroBytes"); |
| + io.close(); |
| + IndexInput ii = mmapDir.openInput("zeroBytes"); |
| + ii.seek(0L); |
| + ii.close(); |
| + mmapDir.close(); |
| + } |
| + } |
| |
| + public void testSeekEnd() throws Exception { |
| + for (int i = 0; i < 17; i++) { |
| + MMapDirectory mmapDir = new MMapDirectory(_TestUtil.getTempDir("testSeekEnd")); |
| + mmapDir.setMaxChunkSize(1<<i); |
| + IndexOutput io = mmapDir.createOutput("bytes"); |
| + byte bytes[] = new byte[1<<i]; |
| + random.nextBytes(bytes); |
| + io.writeBytes(bytes, bytes.length); |
| + io.close(); |
| + IndexInput ii = mmapDir.openInput("bytes"); |
| + byte actual[] = new byte[1<<i]; |
| + ii.readBytes(actual, 0, actual.length); |
| + assertEquals(new BytesRef(bytes), new BytesRef(actual)); |
| + ii.seek(1<<i); |
| + ii.close(); |
| + mmapDir.close(); |
| + } |
| + } |
| + |
| + public void testSeeking() throws Exception { |
| + for (int i = 0; i < 10; i++) { |
| + MMapDirectory mmapDir = new MMapDirectory(_TestUtil.getTempDir("testSeeking")); |
| + mmapDir.setMaxChunkSize(1<<i); |
| + IndexOutput io = mmapDir.createOutput("bytes"); |
| + byte bytes[] = new byte[1<<(i+1)]; // make sure we switch buffers |
| + random.nextBytes(bytes); |
| + io.writeBytes(bytes, bytes.length); |
| + io.close(); |
| + IndexInput ii = mmapDir.openInput("bytes"); |
| + byte actual[] = new byte[1<<(i+1)]; // first read all bytes |
| + ii.readBytes(actual, 0, actual.length); |
| + assertEquals(new BytesRef(bytes), new BytesRef(actual)); |
| + for (int sliceStart = 0; sliceStart < bytes.length; sliceStart++) { |
| + for (int sliceLength = 0; sliceLength < bytes.length - sliceStart; sliceLength++) { |
| + byte slice[] = new byte[sliceLength]; |
| + ii.seek(sliceStart); |
| + ii.readBytes(slice, 0, slice.length); |
| + assertEquals(new BytesRef(bytes, sliceStart, sliceLength), new BytesRef(slice)); |
| + } |
| + } |
| + ii.close(); |
| + mmapDir.close(); |
| + } |
| + } |
| + |
| public void testRandomChunkSizes() throws Exception { |
| int num = atLeast(10); |
| for (int i = 0; i < num; i++) |
| @@ -55,11 +115,12 @@
|
| File path = _TestUtil.createTempFile("mmap" + chunkSize, "tmp", workDir); |
| path.delete(); |
| path.mkdirs(); |
| - MMapDirectory dir = new MMapDirectory(path); |
| - dir.setMaxChunkSize(chunkSize); |
| + MMapDirectory mmapDir = new MMapDirectory(path); |
| + mmapDir.setMaxChunkSize(chunkSize); |
| // we will map a lot, try to turn on the unmap hack |
| if (MMapDirectory.UNMAP_SUPPORTED) |
| - dir.setUseUnmap(true); |
| + mmapDir.setUseUnmap(true); |
| + MockDirectoryWrapper dir = new MockDirectoryWrapper(random, mmapDir); |
| RandomIndexWriter writer = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); |
| Document doc = new Document(); |
| Field docid = newField("docid", "0", Field.Store.YES, Field.Index.NOT_ANALYZED); |