| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.store; |
| |
| |
| import java.io.IOException; |
| import java.nio.charset.StandardCharsets; |
| import java.util.Collections; |
| import java.util.HashMap; |
| import java.util.HashSet; |
| import java.util.Map; |
| import java.util.Set; |
| import java.util.TreeMap; |
| import java.util.TreeSet; |
| |
| import org.apache.lucene.util.BitUtil; |
| import org.apache.lucene.util.FutureObjects; |
| |
| /** |
| * Abstract base class for performing read operations of Lucene's low-level |
| * data types. |
| * |
| * <p>{@code DataInput} may only be used from one thread, because it is not |
| * thread safe (it keeps internal state like file position). To allow |
| * multithreaded use, every {@code DataInput} instance must be cloned before |
| * used in another thread. Subclasses must therefore implement {@link #clone()}, |
| * returning a new {@code DataInput} which operates on the same underlying |
| * resource, but positioned independently. |
| */ |
| public abstract class DataInput implements Cloneable { |
| |
| private static final int SKIP_BUFFER_SIZE = 1024; |
| |
| /* This buffer is used to skip over bytes with the default implementation of |
| * skipBytes. The reason why we need to use an instance member instead of |
| * sharing a single instance across threads is that some delegating |
| * implementations of DataInput might want to reuse the provided buffer in |
| * order to eg. update the checksum. If we shared the same buffer across |
| * threads, then another thread might update the buffer while the checksum is |
| * being computed, making it invalid. See LUCENE-5583 for more information. |
| */ |
| private byte[] skipBuffer; |
| |
| /** Reads and returns a single byte. |
| * @see DataOutput#writeByte(byte) |
| */ |
| public abstract byte readByte() throws IOException; |
| |
| /** Reads a specified number of bytes into an array at the specified offset. |
| * @param b the array to read bytes into |
| * @param offset the offset in the array to start storing bytes |
| * @param len the number of bytes to read |
| * @see DataOutput#writeBytes(byte[],int) |
| */ |
| public abstract void readBytes(byte[] b, int offset, int len) |
| throws IOException; |
| |
| /** Reads a specified number of bytes into an array at the |
| * specified offset with control over whether the read |
| * should be buffered (callers who have their own buffer |
| * should pass in "false" for useBuffer). Currently only |
| * {@link BufferedIndexInput} respects this parameter. |
| * @param b the array to read bytes into |
| * @param offset the offset in the array to start storing bytes |
| * @param len the number of bytes to read |
| * @param useBuffer set to false if the caller will handle |
| * buffering. |
| * @see DataOutput#writeBytes(byte[],int) |
| */ |
| public void readBytes(byte[] b, int offset, int len, boolean useBuffer) |
| throws IOException |
| { |
| // Default to ignoring useBuffer entirely |
| readBytes(b, offset, len); |
| } |
| |
| /** Reads two bytes and returns a short. |
| * @see DataOutput#writeByte(byte) |
| */ |
| public short readShort() throws IOException { |
| return (short) (((readByte() & 0xFF) << 8) | (readByte() & 0xFF)); |
| } |
| |
| /** Reads four bytes and returns an int. |
| * @see DataOutput#writeInt(int) |
| */ |
| public int readInt() throws IOException { |
| return ((readByte() & 0xFF) << 24) | ((readByte() & 0xFF) << 16) |
| | ((readByte() & 0xFF) << 8) | (readByte() & 0xFF); |
| } |
| |
| /** Reads an int stored in variable-length format. Reads between one and |
| * five bytes. Smaller values take fewer bytes. Negative numbers are |
| * supported, but should be avoided. |
| * <p> |
| * The format is described further in {@link DataOutput#writeVInt(int)}. |
| * |
| * @see DataOutput#writeVInt(int) |
| */ |
| public int readVInt() throws IOException { |
| /* This is the original code of this method, |
| * but a Hotspot bug (see LUCENE-2975) corrupts the for-loop if |
| * readByte() is inlined. So the loop was unwinded! |
| byte b = readByte(); |
| int i = b & 0x7F; |
| for (int shift = 7; (b & 0x80) != 0; shift += 7) { |
| b = readByte(); |
| i |= (b & 0x7F) << shift; |
| } |
| return i; |
| */ |
| byte b = readByte(); |
| if (b >= 0) return b; |
| int i = b & 0x7F; |
| b = readByte(); |
| i |= (b & 0x7F) << 7; |
| if (b >= 0) return i; |
| b = readByte(); |
| i |= (b & 0x7F) << 14; |
| if (b >= 0) return i; |
| b = readByte(); |
| i |= (b & 0x7F) << 21; |
| if (b >= 0) return i; |
| b = readByte(); |
| // Warning: the next ands use 0x0F / 0xF0 - beware copy/paste errors: |
| i |= (b & 0x0F) << 28; |
| if ((b & 0xF0) == 0) return i; |
| throw new IOException("Invalid vInt detected (too many bits)"); |
| } |
| |
| /** |
| * Read a {@link BitUtil#zigZagDecode(int) zig-zag}-encoded |
| * {@link #readVInt() variable-length} integer. |
| * @see DataOutput#writeZInt(int) |
| */ |
| public int readZInt() throws IOException { |
| return BitUtil.zigZagDecode(readVInt()); |
| } |
| |
| /** Reads eight bytes and returns a long. |
| * @see DataOutput#writeLong(long) |
| */ |
| public long readLong() throws IOException { |
| return (((long)readInt()) << 32) | (readInt() & 0xFFFFFFFFL); |
| } |
| |
| /** |
| * Read a specified number of longs with the little endian byte order. |
| * <p>This method can be used to read longs whose bytes have been |
| * {@link Long#reverseBytes reversed} at write time: |
| * <pre class="prettyprint"> |
| * for (long l : longs) { |
| * output.writeLong(Long.reverseBytes(l)); |
| * } |
| * </pre> |
| * @lucene.experimental |
| */ |
| // TODO: LUCENE-9047: Make the entire DataInput/DataOutput API little endian |
| // Then this would just be `readLongs`? |
| public void readLELongs(long[] dst, int offset, int length) throws IOException { |
| FutureObjects.checkFromIndexSize(offset, length, dst.length); |
| for (int i = 0; i < length; ++i) { |
| dst[offset + i] = Long.reverseBytes(readLong()); |
| } |
| } |
| |
| /** Reads a long stored in variable-length format. Reads between one and |
| * nine bytes. Smaller values take fewer bytes. Negative numbers are not |
| * supported. |
| * <p> |
| * The format is described further in {@link DataOutput#writeVInt(int)}. |
| * |
| * @see DataOutput#writeVLong(long) |
| */ |
| public long readVLong() throws IOException { |
| return readVLong(false); |
| } |
| |
| private long readVLong(boolean allowNegative) throws IOException { |
| /* This is the original code of this method, |
| * but a Hotspot bug (see LUCENE-2975) corrupts the for-loop if |
| * readByte() is inlined. So the loop was unwinded! |
| byte b = readByte(); |
| long i = b & 0x7F; |
| for (int shift = 7; (b & 0x80) != 0; shift += 7) { |
| b = readByte(); |
| i |= (b & 0x7FL) << shift; |
| } |
| return i; |
| */ |
| byte b = readByte(); |
| if (b >= 0) return b; |
| long i = b & 0x7FL; |
| b = readByte(); |
| i |= (b & 0x7FL) << 7; |
| if (b >= 0) return i; |
| b = readByte(); |
| i |= (b & 0x7FL) << 14; |
| if (b >= 0) return i; |
| b = readByte(); |
| i |= (b & 0x7FL) << 21; |
| if (b >= 0) return i; |
| b = readByte(); |
| i |= (b & 0x7FL) << 28; |
| if (b >= 0) return i; |
| b = readByte(); |
| i |= (b & 0x7FL) << 35; |
| if (b >= 0) return i; |
| b = readByte(); |
| i |= (b & 0x7FL) << 42; |
| if (b >= 0) return i; |
| b = readByte(); |
| i |= (b & 0x7FL) << 49; |
| if (b >= 0) return i; |
| b = readByte(); |
| i |= (b & 0x7FL) << 56; |
| if (b >= 0) return i; |
| if (allowNegative) { |
| b = readByte(); |
| i |= (b & 0x7FL) << 63; |
| if (b == 0 || b == 1) return i; |
| throw new IOException("Invalid vLong detected (more than 64 bits)"); |
| } else { |
| throw new IOException("Invalid vLong detected (negative values disallowed)"); |
| } |
| } |
| |
| /** |
| * Read a {@link BitUtil#zigZagDecode(long) zig-zag}-encoded |
| * {@link #readVLong() variable-length} integer. Reads between one and ten |
| * bytes. |
| * @see DataOutput#writeZLong(long) |
| */ |
| public long readZLong() throws IOException { |
| return BitUtil.zigZagDecode(readVLong(true)); |
| } |
| |
| /** Reads a string. |
| * @see DataOutput#writeString(String) |
| */ |
| public String readString() throws IOException { |
| int length = readVInt(); |
| final byte[] bytes = new byte[length]; |
| readBytes(bytes, 0, length); |
| return new String(bytes, 0, length, StandardCharsets.UTF_8); |
| } |
| |
| /** Returns a clone of this stream. |
| * |
| * <p>Clones of a stream access the same data, and are positioned at the same |
| * point as the stream they were cloned from. |
| * |
| * <p>Expert: Subclasses must ensure that clones may be positioned at |
| * different points in the input from each other and from the stream they |
| * were cloned from. |
| */ |
| @Override |
| public DataInput clone() { |
| try { |
| return (DataInput) super.clone(); |
| } catch (CloneNotSupportedException e) { |
| throw new Error("This cannot happen: Failing to clone DataInput"); |
| } |
| } |
| |
| /** |
| * Reads a Map<String,String> previously written |
| * with {@link DataOutput#writeMapOfStrings(Map)}. |
| * @return An immutable map containing the written contents. |
| */ |
| public Map<String,String> readMapOfStrings() throws IOException { |
| int count = readVInt(); |
| if (count == 0) { |
| return Collections.emptyMap(); |
| } else if (count == 1) { |
| return Collections.singletonMap(readString(), readString()); |
| } else { |
| Map<String,String> map = count > 10 ? new HashMap<>() : new TreeMap<>(); |
| for (int i = 0; i < count; i++) { |
| final String key = readString(); |
| final String val = readString(); |
| map.put(key, val); |
| } |
| return Collections.unmodifiableMap(map); |
| } |
| } |
| |
| /** |
| * Reads a Set<String> previously written |
| * with {@link DataOutput#writeSetOfStrings(Set)}. |
| * @return An immutable set containing the written contents. |
| */ |
| public Set<String> readSetOfStrings() throws IOException { |
| int count = readVInt(); |
| if (count == 0) { |
| return Collections.emptySet(); |
| } else if (count == 1) { |
| return Collections.singleton(readString()); |
| } else { |
| Set<String> set = count > 10 ? new HashSet<>() : new TreeSet<>(); |
| for (int i = 0; i < count; i++) { |
| set.add(readString()); |
| } |
| return Collections.unmodifiableSet(set); |
| } |
| } |
| |
| /** |
| * Skip over <code>numBytes</code> bytes. The contract on this method is that it |
| * should have the same behavior as reading the same number of bytes into a |
| * buffer and discarding its content. Negative values of <code>numBytes</code> |
| * are not supported. |
| */ |
| public void skipBytes(final long numBytes) throws IOException { |
| if (numBytes < 0) { |
| throw new IllegalArgumentException("numBytes must be >= 0, got " + numBytes); |
| } |
| if (skipBuffer == null) { |
| skipBuffer = new byte[SKIP_BUFFER_SIZE]; |
| } |
| assert skipBuffer.length == SKIP_BUFFER_SIZE; |
| for (long skipped = 0; skipped < numBytes; ) { |
| final int step = (int) Math.min(SKIP_BUFFER_SIZE, numBytes - skipped); |
| readBytes(skipBuffer, 0, step, false); |
| skipped += step; |
| } |
| } |
| |
| } |