lucene/core/src/java/org/apache/lucene/store/DataInput.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.store;


 import java.io.IOException;
 import java.nio.charset.StandardCharsets;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
 import java.util.TreeMap;
 import java.util.TreeSet;

 import org.apache.lucene.util.BitUtil;
 import org.apache.lucene.util.FutureObjects;

 /**
  * Abstract base class for performing read operations of Lucene's low-level
  * data types.
  *
  * <p>{@code DataInput} may only be used from one thread, because it is not
  * thread safe (it keeps internal state like file position). To allow
  * multithreaded use, every {@code DataInput} instance must be cloned before
  * used in another thread. Subclasses must therefore implement {@link #clone()},
  * returning a new {@code DataInput} which operates on the same underlying
  * resource, but positioned independently.
  */
 public abstract class DataInput implements Cloneable {

   private static final int SKIP_BUFFER_SIZE = 1024;

   /* This buffer is used to skip over bytes with the default implementation of
    * skipBytes. The reason why we need to use an instance member instead of
    * sharing a single instance across threads is that some delegating
    * implementations of DataInput might want to reuse the provided buffer in
    * order to eg. update the checksum. If we shared the same buffer across
    * threads, then another thread might update the buffer while the checksum is
    * being computed, making it invalid. See LUCENE-5583 for more information.
    */
   private byte[] skipBuffer;

   /** Reads and returns a single byte.
    * @see DataOutput#writeByte(byte)
    */
   public abstract byte readByte() throws IOException;

   /** Reads a specified number of bytes into an array at the specified offset.
    * @param b the array to read bytes into
    * @param offset the offset in the array to start storing bytes
    * @param len the number of bytes to read
    * @see DataOutput#writeBytes(byte[],int)
    */
   public abstract void readBytes(byte[] b, int offset, int len)
     throws IOException;

   /** Reads a specified number of bytes into an array at the
    * specified offset with control over whether the read
    * should be buffered (callers who have their own buffer
    * should pass in "false" for useBuffer).  Currently only
    * {@link BufferedIndexInput} respects this parameter.
    * @param b the array to read bytes into
    * @param offset the offset in the array to start storing bytes
    * @param len the number of bytes to read
    * @param useBuffer set to false if the caller will handle
    * buffering.
    * @see DataOutput#writeBytes(byte[],int)
    */
   public void readBytes(byte[] b, int offset, int len, boolean useBuffer)
     throws IOException
   {
     // Default to ignoring useBuffer entirely
     readBytes(b, offset, len);
   }

   /** Reads two bytes and returns a short.
    * @see DataOutput#writeByte(byte)
    */
   public short readShort() throws IOException {
     return (short) (((readByte() & 0xFF) <<  8) |  (readByte() & 0xFF));
   }

   /** Reads four bytes and returns an int.
    * @see DataOutput#writeInt(int)
    */
   public int readInt() throws IOException {
     return ((readByte() & 0xFF) << 24) | ((readByte() & 0xFF) << 16)
          | ((readByte() & 0xFF) <<  8) |  (readByte() & 0xFF);
   }

   /** Reads an int stored in variable-length format.  Reads between one and
    * five bytes.  Smaller values take fewer bytes.  Negative numbers are
    * supported, but should be avoided.
    * <p>
    * The format is described further in {@link DataOutput#writeVInt(int)}.
    *
    * @see DataOutput#writeVInt(int)
    */
   public int readVInt() throws IOException {
     /* This is the original code of this method,
      * but a Hotspot bug (see LUCENE-2975) corrupts the for-loop if
      * readByte() is inlined. So the loop was unwinded!
     byte b = readByte();
     int i = b & 0x7F;
     for (int shift = 7; (b & 0x80) != 0; shift += 7) {
       b = readByte();
       i |= (b & 0x7F) << shift;
     }
     return i;
     */
     byte b = readByte();
     if (b >= 0) return b;
     int i = b & 0x7F;
     b = readByte();
     i |= (b & 0x7F) << 7;
     if (b >= 0) return i;
     b = readByte();
     i |= (b & 0x7F) << 14;
     if (b >= 0) return i;
     b = readByte();
     i |= (b & 0x7F) << 21;
     if (b >= 0) return i;
     b = readByte();
     // Warning: the next ands use 0x0F / 0xF0 - beware copy/paste errors:
     i |= (b & 0x0F) << 28;
     if ((b & 0xF0) == 0) return i;
     throw new IOException("Invalid vInt detected (too many bits)");
   }

   /**
    * Read a {@link BitUtil#zigZagDecode(int) zig-zag}-encoded
    * {@link #readVInt() variable-length} integer.
    * @see DataOutput#writeZInt(int)
    */
   public int readZInt() throws IOException {
     return BitUtil.zigZagDecode(readVInt());
   }

   /** Reads eight bytes and returns a long.
    * @see DataOutput#writeLong(long)
    */
   public long readLong() throws IOException {
     return (((long)readInt()) << 32) | (readInt() & 0xFFFFFFFFL);
   }

   /**
    * Read a specified number of longs with the little endian byte order.
    * <p>This method can be used to read longs whose bytes have been
    * {@link Long#reverseBytes reversed} at write time:
    * <pre class="prettyprint">
    * for (long l : longs) {
    *   output.writeLong(Long.reverseBytes(l));
    * }
    * </pre>
    * @lucene.experimental
    */
   // TODO: LUCENE-9047: Make the entire DataInput/DataOutput API little endian
   // Then this would just be `readLongs`?
   public void readLELongs(long[] dst, int offset, int length) throws IOException {
     FutureObjects.checkFromIndexSize(offset, length, dst.length);
     for (int i = 0; i < length; ++i) {
       dst[offset + i] = Long.reverseBytes(readLong());
     }
   }

   /** Reads a long stored in variable-length format.  Reads between one and
    * nine bytes.  Smaller values take fewer bytes.  Negative numbers are not
    * supported.
    * <p>
    * The format is described further in {@link DataOutput#writeVInt(int)}.
    *
    * @see DataOutput#writeVLong(long)
    */
   public long readVLong() throws IOException {
     return readVLong(false);
   }

   private long readVLong(boolean allowNegative) throws IOException {
     /* This is the original code of this method,
      * but a Hotspot bug (see LUCENE-2975) corrupts the for-loop if
      * readByte() is inlined. So the loop was unwinded!
     byte b = readByte();
     long i = b & 0x7F;
     for (int shift = 7; (b & 0x80) != 0; shift += 7) {
       b = readByte();
       i |= (b & 0x7FL) << shift;
     }
     return i;
     */
     byte b = readByte();
     if (b >= 0) return b;
     long i = b & 0x7FL;
     b = readByte();
     i |= (b & 0x7FL) << 7;
     if (b >= 0) return i;
     b = readByte();
     i |= (b & 0x7FL) << 14;
     if (b >= 0) return i;
     b = readByte();
     i |= (b & 0x7FL) << 21;
     if (b >= 0) return i;
     b = readByte();
     i |= (b & 0x7FL) << 28;
     if (b >= 0) return i;
     b = readByte();
     i |= (b & 0x7FL) << 35;
     if (b >= 0) return i;
     b = readByte();
     i |= (b & 0x7FL) << 42;
     if (b >= 0) return i;
     b = readByte();
     i |= (b & 0x7FL) << 49;
     if (b >= 0) return i;
     b = readByte();
     i |= (b & 0x7FL) << 56;
     if (b >= 0) return i;
     if (allowNegative) {
       b = readByte();
       i |= (b & 0x7FL) << 63;
       if (b == 0 || b == 1) return i;
       throw new IOException("Invalid vLong detected (more than 64 bits)");
     } else {
       throw new IOException("Invalid vLong detected (negative values disallowed)");
     }
   }

   /**
    * Read a {@link BitUtil#zigZagDecode(long) zig-zag}-encoded
    * {@link #readVLong() variable-length} integer. Reads between one and ten
    * bytes.
    * @see DataOutput#writeZLong(long)
    */
   public long readZLong() throws IOException {
     return BitUtil.zigZagDecode(readVLong(true));
   }

   /** Reads a string.
    * @see DataOutput#writeString(String)
    */
   public String readString() throws IOException {
     int length = readVInt();
     final byte[] bytes = new byte[length];
     readBytes(bytes, 0, length);
     return new String(bytes, 0, length, StandardCharsets.UTF_8);
   }

   /** Returns a clone of this stream.
    *
    * <p>Clones of a stream access the same data, and are positioned at the same
    * point as the stream they were cloned from.
    *
    * <p>Expert: Subclasses must ensure that clones may be positioned at
    * different points in the input from each other and from the stream they
    * were cloned from.
    */
   @Override
   public DataInput clone() {
     try {
       return (DataInput) super.clone();
     } catch (CloneNotSupportedException e) {
       throw new Error("This cannot happen: Failing to clone DataInput");
     }
   }

   /**
    * Reads a Map&lt;String,String&gt; previously written
    * with {@link DataOutput#writeMapOfStrings(Map)}.
    * @return An immutable map containing the written contents.
    */
   public Map<String,String> readMapOfStrings() throws IOException {
     int count = readVInt();
     if (count == 0) {
       return Collections.emptyMap();
     } else if (count == 1) {
       return Collections.singletonMap(readString(), readString());
     } else {
       Map<String,String> map = count > 10 ? new HashMap<>() : new TreeMap<>();
       for (int i = 0; i < count; i++) {
         final String key = readString();
         final String val = readString();
         map.put(key, val);
       }
       return Collections.unmodifiableMap(map);
     }
   }

   /**
    * Reads a Set&lt;String&gt; previously written
    * with {@link DataOutput#writeSetOfStrings(Set)}.
    * @return An immutable set containing the written contents.
    */
   public Set<String> readSetOfStrings() throws IOException {
     int count = readVInt();
     if (count == 0) {
       return Collections.emptySet();
     } else if (count == 1) {
       return Collections.singleton(readString());
     } else {
       Set<String> set = count > 10 ? new HashSet<>() : new TreeSet<>();
       for (int i = 0; i < count; i++) {
         set.add(readString());
       }
       return Collections.unmodifiableSet(set);
     }
   }

   /**
    * Skip over <code>numBytes</code> bytes. The contract on this method is that it
    * should have the same behavior as reading the same number of bytes into a
    * buffer and discarding its content. Negative values of <code>numBytes</code>
    * are not supported.
    */
   public void skipBytes(final long numBytes) throws IOException {
     if (numBytes < 0) {
       throw new IllegalArgumentException("numBytes must be >= 0, got " + numBytes);
     }
     if (skipBuffer == null) {
       skipBuffer = new byte[SKIP_BUFFER_SIZE];
     }
     assert skipBuffer.length == SKIP_BUFFER_SIZE;
     for (long skipped = 0; skipped < numBytes; ) {
       final int step = (int) Math.min(SKIP_BUFFER_SIZE, numBytes - skipped);
       readBytes(skipBuffer, 0, step, false);
       skipped += step;
     }
   }

 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.store;


	import java.io.IOException;
	import java.nio.charset.StandardCharsets;
	import java.util.Collections;
	import java.util.HashMap;
	import java.util.HashSet;
	import java.util.Map;
	import java.util.Set;
	import java.util.TreeMap;
	import java.util.TreeSet;

	import org.apache.lucene.util.BitUtil;
	import org.apache.lucene.util.FutureObjects;

	/**
	* Abstract base class for performing read operations of Lucene's low-level
	* data types.
	*
	* <p>{@code DataInput} may only be used from one thread, because it is not
	* thread safe (it keeps internal state like file position). To allow
	* multithreaded use, every {@code DataInput} instance must be cloned before
	* used in another thread. Subclasses must therefore implement {@link #clone()},
	* returning a new {@code DataInput} which operates on the same underlying
	* resource, but positioned independently.
	*/
	public abstract class DataInput implements Cloneable {

	private static final int SKIP_BUFFER_SIZE = 1024;

	/* This buffer is used to skip over bytes with the default implementation of
	* skipBytes. The reason why we need to use an instance member instead of
	* sharing a single instance across threads is that some delegating
	* implementations of DataInput might want to reuse the provided buffer in
	* order to eg. update the checksum. If we shared the same buffer across
	* threads, then another thread might update the buffer while the checksum is
	* being computed, making it invalid. See LUCENE-5583 for more information.
	*/
	private byte[] skipBuffer;

	/** Reads and returns a single byte.
	* @see DataOutput#writeByte(byte)
	*/
	public abstract byte readByte() throws IOException;

	/** Reads a specified number of bytes into an array at the specified offset.
	* @param b the array to read bytes into
	* @param offset the offset in the array to start storing bytes
	* @param len the number of bytes to read
	* @see DataOutput#writeBytes(byte[],int)
	*/
	public abstract void readBytes(byte[] b, int offset, int len)
	throws IOException;

	/** Reads a specified number of bytes into an array at the
	* specified offset with control over whether the read
	* should be buffered (callers who have their own buffer
	* should pass in "false" for useBuffer). Currently only
	* {@link BufferedIndexInput} respects this parameter.
	* @param b the array to read bytes into
	* @param offset the offset in the array to start storing bytes
	* @param len the number of bytes to read
	* @param useBuffer set to false if the caller will handle
	* buffering.
	* @see DataOutput#writeBytes(byte[],int)
	*/
	public void readBytes(byte[] b, int offset, int len, boolean useBuffer)
	throws IOException
	{
	// Default to ignoring useBuffer entirely
	readBytes(b, offset, len);
	}

	/** Reads two bytes and returns a short.
	* @see DataOutput#writeByte(byte)
	*/
	public short readShort() throws IOException {
	return (short) (((readByte() & 0xFF) << 8) \| (readByte() & 0xFF));
	}

	/** Reads four bytes and returns an int.
	* @see DataOutput#writeInt(int)
	*/
	public int readInt() throws IOException {
	return ((readByte() & 0xFF) << 24) \| ((readByte() & 0xFF) << 16)
	\| ((readByte() & 0xFF) << 8) \| (readByte() & 0xFF);
	}

	/** Reads an int stored in variable-length format. Reads between one and
	* five bytes. Smaller values take fewer bytes. Negative numbers are
	* supported, but should be avoided.
	* <p>
	* The format is described further in {@link DataOutput#writeVInt(int)}.
	*
	* @see DataOutput#writeVInt(int)
	*/
	public int readVInt() throws IOException {
	/* This is the original code of this method,
	* but a Hotspot bug (see LUCENE-2975) corrupts the for-loop if
	* readByte() is inlined. So the loop was unwinded!
	byte b = readByte();
	int i = b & 0x7F;
	for (int shift = 7; (b & 0x80) != 0; shift += 7) {
	b = readByte();
	i \|= (b & 0x7F) << shift;
	}
	return i;
	*/
	byte b = readByte();
	if (b >= 0) return b;
	int i = b & 0x7F;
	b = readByte();
	i \|= (b & 0x7F) << 7;
	if (b >= 0) return i;
	b = readByte();
	i \|= (b & 0x7F) << 14;
	if (b >= 0) return i;
	b = readByte();
	i \|= (b & 0x7F) << 21;
	if (b >= 0) return i;
	b = readByte();
	// Warning: the next ands use 0x0F / 0xF0 - beware copy/paste errors:
	i \|= (b & 0x0F) << 28;
	if ((b & 0xF0) == 0) return i;
	throw new IOException("Invalid vInt detected (too many bits)");
	}

	/**
	* Read a {@link BitUtil#zigZagDecode(int) zig-zag}-encoded
	* {@link #readVInt() variable-length} integer.
	* @see DataOutput#writeZInt(int)
	*/
	public int readZInt() throws IOException {
	return BitUtil.zigZagDecode(readVInt());
	}

	/** Reads eight bytes and returns a long.
	* @see DataOutput#writeLong(long)
	*/
	public long readLong() throws IOException {
	return (((long)readInt()) << 32) \| (readInt() & 0xFFFFFFFFL);
	}

	/**
	* Read a specified number of longs with the little endian byte order.
	* <p>This method can be used to read longs whose bytes have been
	* {@link Long#reverseBytes reversed} at write time:
	* <pre class="prettyprint">
	* for (long l : longs) {
	* output.writeLong(Long.reverseBytes(l));
	* }
	* </pre>
	* @lucene.experimental
	*/
	// TODO: LUCENE-9047: Make the entire DataInput/DataOutput API little endian
	// Then this would just be `readLongs`?
	public void readLELongs(long[] dst, int offset, int length) throws IOException {
	FutureObjects.checkFromIndexSize(offset, length, dst.length);
	for (int i = 0; i < length; ++i) {
	dst[offset + i] = Long.reverseBytes(readLong());
	}
	}

	/** Reads a long stored in variable-length format. Reads between one and
	* nine bytes. Smaller values take fewer bytes. Negative numbers are not
	* supported.
	* <p>
	* The format is described further in {@link DataOutput#writeVInt(int)}.
	*
	* @see DataOutput#writeVLong(long)
	*/
	public long readVLong() throws IOException {
	return readVLong(false);
	}

	private long readVLong(boolean allowNegative) throws IOException {
	/* This is the original code of this method,
	* but a Hotspot bug (see LUCENE-2975) corrupts the for-loop if
	* readByte() is inlined. So the loop was unwinded!
	byte b = readByte();
	long i = b & 0x7F;
	for (int shift = 7; (b & 0x80) != 0; shift += 7) {
	b = readByte();
	i \|= (b & 0x7FL) << shift;
	}
	return i;
	*/
	byte b = readByte();
	if (b >= 0) return b;
	long i = b & 0x7FL;
	b = readByte();
	i \|= (b & 0x7FL) << 7;
	if (b >= 0) return i;
	b = readByte();
	i \|= (b & 0x7FL) << 14;
	if (b >= 0) return i;
	b = readByte();
	i \|= (b & 0x7FL) << 21;
	if (b >= 0) return i;
	b = readByte();
	i \|= (b & 0x7FL) << 28;
	if (b >= 0) return i;
	b = readByte();
	i \|= (b & 0x7FL) << 35;
	if (b >= 0) return i;
	b = readByte();
	i \|= (b & 0x7FL) << 42;
	if (b >= 0) return i;
	b = readByte();
	i \|= (b & 0x7FL) << 49;
	if (b >= 0) return i;
	b = readByte();
	i \|= (b & 0x7FL) << 56;
	if (b >= 0) return i;
	if (allowNegative) {
	b = readByte();
	i \|= (b & 0x7FL) << 63;
	if (b == 0 \|\| b == 1) return i;
	throw new IOException("Invalid vLong detected (more than 64 bits)");
	} else {
	throw new IOException("Invalid vLong detected (negative values disallowed)");
	}
	}

	/**
	* Read a {@link BitUtil#zigZagDecode(long) zig-zag}-encoded
	* {@link #readVLong() variable-length} integer. Reads between one and ten
	* bytes.
	* @see DataOutput#writeZLong(long)
	*/
	public long readZLong() throws IOException {
	return BitUtil.zigZagDecode(readVLong(true));
	}

	/** Reads a string.
	* @see DataOutput#writeString(String)
	*/
	public String readString() throws IOException {
	int length = readVInt();
	final byte[] bytes = new byte[length];
	readBytes(bytes, 0, length);
	return new String(bytes, 0, length, StandardCharsets.UTF_8);
	}

	/** Returns a clone of this stream.
	*
	* <p>Clones of a stream access the same data, and are positioned at the same
	* point as the stream they were cloned from.
	*
	* <p>Expert: Subclasses must ensure that clones may be positioned at
	* different points in the input from each other and from the stream they
	* were cloned from.
	*/
	@Override
	public DataInput clone() {
	try {
	return (DataInput) super.clone();
	} catch (CloneNotSupportedException e) {
	throw new Error("This cannot happen: Failing to clone DataInput");
	}
	}

	/**
	* Reads a Map<String,String> previously written
	* with {@link DataOutput#writeMapOfStrings(Map)}.
	* @return An immutable map containing the written contents.
	*/
	public Map<String,String> readMapOfStrings() throws IOException {
	int count = readVInt();
	if (count == 0) {
	return Collections.emptyMap();
	} else if (count == 1) {
	return Collections.singletonMap(readString(), readString());
	} else {
	Map<String,String> map = count > 10 ? new HashMap<>() : new TreeMap<>();
	for (int i = 0; i < count; i++) {
	final String key = readString();
	final String val = readString();
	map.put(key, val);
	}
	return Collections.unmodifiableMap(map);
	}
	}

	/**
	* Reads a Set<String> previously written
	* with {@link DataOutput#writeSetOfStrings(Set)}.
	* @return An immutable set containing the written contents.
	*/
	public Set<String> readSetOfStrings() throws IOException {
	int count = readVInt();
	if (count == 0) {
	return Collections.emptySet();
	} else if (count == 1) {
	return Collections.singleton(readString());
	} else {
	Set<String> set = count > 10 ? new HashSet<>() : new TreeSet<>();
	for (int i = 0; i < count; i++) {
	set.add(readString());
	}
	return Collections.unmodifiableSet(set);
	}
	}

	/**
	* Skip over <code>numBytes</code> bytes. The contract on this method is that it
	* should have the same behavior as reading the same number of bytes into a
	* buffer and discarding its content. Negative values of <code>numBytes</code>
	* are not supported.
	*/
	public void skipBytes(final long numBytes) throws IOException {
	if (numBytes < 0) {
	throw new IllegalArgumentException("numBytes must be >= 0, got " + numBytes);
	}
	if (skipBuffer == null) {
	skipBuffer = new byte[SKIP_BUFFER_SIZE];
	}
	assert skipBuffer.length == SKIP_BUFFER_SIZE;
	for (long skipped = 0; skipped < numBytes; ) {
	final int step = (int) Math.min(SKIP_BUFFER_SIZE, numBytes - skipped);
	readBytes(skipBuffer, 0, step, false);
	skipped += step;
	}
	}

	}