blob: 6916ef1483453d1a14bdb67d93a61cc59e0cbaef [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util;
import java.io.IOException;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexInput;
/**
* Represents a logical byte[] as a series of pages. You can write-once into the logical byte[]
* (append only), using copy, and then retrieve slices (BytesRef) into it using fill.
*
* @lucene.internal
*/
// TODO: refactor this, byteblockpool, fst.bytestore, and any
// other "shift/mask big arrays". there are too many of these classes!
public final class PagedBytes implements Accountable {
private static final long BASE_RAM_BYTES_USED =
RamUsageEstimator.shallowSizeOfInstance(PagedBytes.class);
private byte[][] blocks = new byte[16][];
private int numBlocks;
// TODO: these are unused?
private final int blockSize;
private final int blockBits;
private final int blockMask;
private boolean didSkipBytes;
private boolean frozen;
private int upto;
private byte[] currentBlock;
private final long bytesUsedPerBlock;
private static final byte[] EMPTY_BYTES = new byte[0];
/**
* Provides methods to read BytesRefs from a frozen PagedBytes.
*
* @see #freeze
*/
public static final class Reader implements Accountable {
private static final long BASE_RAM_BYTES_USED =
RamUsageEstimator.shallowSizeOfInstance(Reader.class);
private final byte[][] blocks;
private final int blockBits;
private final int blockMask;
private final int blockSize;
private final long bytesUsedPerBlock;
private Reader(PagedBytes pagedBytes) {
blocks = ArrayUtil.copyOfSubArray(pagedBytes.blocks, 0, pagedBytes.numBlocks);
blockBits = pagedBytes.blockBits;
blockMask = pagedBytes.blockMask;
blockSize = pagedBytes.blockSize;
bytesUsedPerBlock = pagedBytes.bytesUsedPerBlock;
}
/**
* Gets a slice out of {@link PagedBytes} starting at <i>start</i> with a given length. Iff the
* slice spans across a block border this method will allocate sufficient resources and copy the
* paged data.
*
* <p>Slices spanning more than two blocks are not supported.
*
* @lucene.internal
*/
public void fillSlice(BytesRef b, long start, int length) {
assert length >= 0 : "length=" + length;
assert length <= blockSize + 1 : "length=" + length;
b.length = length;
if (length == 0) {
return;
}
final int index = (int) (start >> blockBits);
final int offset = (int) (start & blockMask);
if (blockSize - offset >= length) {
// Within block
b.bytes = blocks[index];
b.offset = offset;
} else {
// Split
b.bytes = new byte[length];
b.offset = 0;
System.arraycopy(blocks[index], offset, b.bytes, 0, blockSize - offset);
System.arraycopy(
blocks[1 + index], 0, b.bytes, blockSize - offset, length - (blockSize - offset));
}
}
/**
* Reads length as 1 or 2 byte vInt prefix, starting at <i>start</i>.
*
* <p><b>Note:</b> this method does not support slices spanning across block borders.
*
* @lucene.internal
*/
// TODO: this really needs to be refactored into fieldcacheimpl
public void fill(BytesRef b, long start) {
final int index = (int) (start >> blockBits);
final int offset = (int) (start & blockMask);
final byte[] block = b.bytes = blocks[index];
if ((block[offset] & 128) == 0) {
b.length = block[offset];
b.offset = offset + 1;
} else {
b.length = ((block[offset] & 0x7f) << 8) | (block[1 + offset] & 0xff);
b.offset = offset + 2;
assert b.length > 0;
}
}
@Override
public long ramBytesUsed() {
long size = BASE_RAM_BYTES_USED + RamUsageEstimator.shallowSizeOf(blocks);
if (blocks.length > 0) {
size += (blocks.length - 1) * bytesUsedPerBlock;
size += RamUsageEstimator.sizeOf(blocks[blocks.length - 1]);
}
return size;
}
@Override
public String toString() {
return "PagedBytes(blocksize=" + blockSize + ")";
}
}
/** 1&lt;&lt;blockBits must be bigger than biggest single BytesRef slice that will be pulled */
public PagedBytes(int blockBits) {
assert blockBits > 0 && blockBits <= 31 : blockBits;
this.blockSize = 1 << blockBits;
this.blockBits = blockBits;
blockMask = blockSize - 1;
upto = blockSize;
bytesUsedPerBlock =
RamUsageEstimator.alignObjectSize(blockSize + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER);
numBlocks = 0;
}
private void addBlock(byte[] block) {
blocks = ArrayUtil.grow(blocks, numBlocks + 1);
blocks[numBlocks++] = block;
}
/** Read this many bytes from in */
public void copy(IndexInput in, long byteCount) throws IOException {
while (byteCount > 0) {
int left = blockSize - upto;
if (left == 0) {
if (currentBlock != null) {
addBlock(currentBlock);
}
currentBlock = new byte[blockSize];
upto = 0;
left = blockSize;
}
if (left < byteCount) {
in.readBytes(currentBlock, upto, left, false);
upto = blockSize;
byteCount -= left;
} else {
in.readBytes(currentBlock, upto, (int) byteCount, false);
upto += byteCount;
break;
}
}
}
/**
* Copy BytesRef in, setting BytesRef out to the result. Do not use this if you will use
* freeze(true). This only supports bytes.length &lt;= blockSize
*/
public void copy(BytesRef bytes, BytesRef out) {
int left = blockSize - upto;
if (bytes.length > left || currentBlock == null) {
if (currentBlock != null) {
addBlock(currentBlock);
didSkipBytes = true;
}
currentBlock = new byte[blockSize];
upto = 0;
left = blockSize;
assert bytes.length <= blockSize;
// TODO: we could also support variable block sizes
}
out.bytes = currentBlock;
out.offset = upto;
out.length = bytes.length;
System.arraycopy(bytes.bytes, bytes.offset, currentBlock, upto, bytes.length);
upto += bytes.length;
}
/** Commits final byte[], trimming it if necessary and if trim=true */
public Reader freeze(boolean trim) {
if (frozen) {
throw new IllegalStateException("already frozen");
}
if (didSkipBytes) {
throw new IllegalStateException("cannot freeze when copy(BytesRef, BytesRef) was used");
}
if (trim && upto < blockSize) {
final byte[] newBlock = new byte[upto];
System.arraycopy(currentBlock, 0, newBlock, 0, upto);
currentBlock = newBlock;
}
if (currentBlock == null) {
currentBlock = EMPTY_BYTES;
}
addBlock(currentBlock);
frozen = true;
currentBlock = null;
return new PagedBytes.Reader(this);
}
public long getPointer() {
if (currentBlock == null) {
return 0;
} else {
return (numBlocks * ((long) blockSize)) + upto;
}
}
@Override
public long ramBytesUsed() {
long size = BASE_RAM_BYTES_USED + RamUsageEstimator.shallowSizeOf(blocks);
;
if (numBlocks > 0) {
size += (numBlocks - 1) * bytesUsedPerBlock;
size += RamUsageEstimator.sizeOf(blocks[numBlocks - 1]);
}
if (currentBlock != null) {
size += RamUsageEstimator.sizeOf(currentBlock);
}
return size;
}
/** Copy bytes in, writing the length as a 1 or 2 byte vInt prefix. */
// TODO: this really needs to be refactored into fieldcacheimpl!
public long copyUsingLengthPrefix(BytesRef bytes) {
if (bytes.length >= 32768) {
throw new IllegalArgumentException("max length is 32767 (got " + bytes.length + ")");
}
if (upto + bytes.length + 2 > blockSize) {
if (bytes.length + 2 > blockSize) {
throw new IllegalArgumentException(
"block size " + blockSize + " is too small to store length " + bytes.length + " bytes");
}
if (currentBlock != null) {
addBlock(currentBlock);
}
currentBlock = new byte[blockSize];
upto = 0;
}
final long pointer = getPointer();
if (bytes.length < 128) {
currentBlock[upto++] = (byte) bytes.length;
} else {
currentBlock[upto++] = (byte) (0x80 | (bytes.length >> 8));
currentBlock[upto++] = (byte) (bytes.length & 0xff);
}
System.arraycopy(bytes.bytes, bytes.offset, currentBlock, upto, bytes.length);
upto += bytes.length;
return pointer;
}
/**
* Input that transparently iterates over pages
*
* @lucene.internal
*/
public final class PagedBytesDataInput extends DataInput {
private int currentBlockIndex;
private int currentBlockUpto;
private byte[] currentBlock;
PagedBytesDataInput() {
currentBlock = blocks[0];
}
@Override
public PagedBytesDataInput clone() {
PagedBytesDataInput clone = getDataInput();
clone.setPosition(getPosition());
return clone;
}
/** Returns the current byte position. */
public long getPosition() {
return (long) currentBlockIndex * blockSize + currentBlockUpto;
}
/** Seek to a position previously obtained from {@link #getPosition}. */
public void setPosition(long pos) {
currentBlockIndex = (int) (pos >> blockBits);
currentBlock = blocks[currentBlockIndex];
currentBlockUpto = (int) (pos & blockMask);
}
@Override
public byte readByte() {
if (currentBlockUpto == blockSize) {
nextBlock();
}
return currentBlock[currentBlockUpto++];
}
@Override
public void readBytes(byte[] b, int offset, int len) {
assert b.length >= offset + len;
final int offsetEnd = offset + len;
while (true) {
final int blockLeft = blockSize - currentBlockUpto;
final int left = offsetEnd - offset;
if (blockLeft < left) {
System.arraycopy(currentBlock, currentBlockUpto, b, offset, blockLeft);
nextBlock();
offset += blockLeft;
} else {
// Last block
System.arraycopy(currentBlock, currentBlockUpto, b, offset, left);
currentBlockUpto += left;
break;
}
}
}
@Override
public void skipBytes(long numBytes) throws IOException {
skipBytesSlowly(numBytes);
}
private void nextBlock() {
currentBlockIndex++;
currentBlockUpto = 0;
currentBlock = blocks[currentBlockIndex];
}
}
/**
* Output that transparently spills to new pages as necessary
*
* @lucene.internal
*/
public final class PagedBytesDataOutput extends DataOutput {
@Override
public void writeByte(byte b) {
if (upto == blockSize) {
if (currentBlock != null) {
addBlock(currentBlock);
}
currentBlock = new byte[blockSize];
upto = 0;
}
currentBlock[upto++] = b;
}
@Override
public void writeBytes(byte[] b, int offset, int length) {
assert b.length >= offset + length;
if (length == 0) {
return;
}
if (upto == blockSize) {
if (currentBlock != null) {
addBlock(currentBlock);
}
currentBlock = new byte[blockSize];
upto = 0;
}
final int offsetEnd = offset + length;
while (true) {
final int left = offsetEnd - offset;
final int blockLeft = blockSize - upto;
if (blockLeft < left) {
System.arraycopy(b, offset, currentBlock, upto, blockLeft);
addBlock(currentBlock);
currentBlock = new byte[blockSize];
upto = 0;
offset += blockLeft;
} else {
// Last block
System.arraycopy(b, offset, currentBlock, upto, left);
upto += left;
break;
}
}
}
/** Return the current byte position. */
public long getPosition() {
return getPointer();
}
}
/** Returns a DataInput to read values from this PagedBytes instance. */
public PagedBytesDataInput getDataInput() {
if (!frozen) {
throw new IllegalStateException("must call freeze() before getDataInput");
}
return new PagedBytesDataInput();
}
/**
* Returns a DataOutput that you may use to write into this PagedBytes instance. If you do this,
* you should not call the other writing methods (eg, copy); results are undefined.
*/
public PagedBytesDataOutput getDataOutput() {
if (frozen) {
throw new IllegalStateException("cannot get DataOutput after freeze()");
}
return new PagedBytesDataOutput();
}
}