lucene/core/src/java/org/apache/lucene/store/DataOutput.java - lucene-solr - Git at Google

 package org.apache.lucene.store;

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 import java.io.IOException;
 import java.util.Map;
 import java.util.Set;

 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.UnicodeUtil;

 /**
  * Abstract base class for performing write operations of Lucene's low-level
  * data types.

  * <p>{@code DataOutput} may only be used from one thread, because it is not
  * thread safe (it keeps internal state like file position).
  */
 public abstract class DataOutput {

   /** Writes a single byte.
    * <p>
    * The most primitive data type is an eight-bit byte. Files are
    * accessed as sequences of bytes. All other data types are defined
    * as sequences of bytes, so file formats are byte-order independent.
    *
    * @see IndexInput#readByte()
    */
   public abstract void writeByte(byte b) throws IOException;

   /** Writes an array of bytes.
    * @param b the bytes to write
    * @param length the number of bytes to write
    * @see DataInput#readBytes(byte[],int,int)
    */
   public void writeBytes(byte[] b, int length) throws IOException {
     writeBytes(b, 0, length);
   }

   /** Writes an array of bytes.
    * @param b the bytes to write
    * @param offset the offset in the byte array
    * @param length the number of bytes to write
    * @see DataInput#readBytes(byte[],int,int)
    */
   public abstract void writeBytes(byte[] b, int offset, int length) throws IOException;

   /** Writes an int as four bytes.
    * <p>
    * 32-bit unsigned integer written as four bytes, high-order bytes first.
    *
    * @see DataInput#readInt()
    */
   public void writeInt(int i) throws IOException {
     writeByte((byte)(i >> 24));
     writeByte((byte)(i >> 16));
     writeByte((byte)(i >>  8));
     writeByte((byte) i);
   }

   /** Writes a short as two bytes.
    * @see DataInput#readShort()
    */
   public void writeShort(short i) throws IOException {
     writeByte((byte)(i >>  8));
     writeByte((byte) i);
   }

   /** Writes an int in a variable-length format.  Writes between one and
    * five bytes.  Smaller values take fewer bytes.  Negative numbers are
    * supported, but should be avoided.
    * <p>VByte is a variable-length format for positive integers is defined where the
    * high-order bit of each byte indicates whether more bytes remain to be read. The
    * low-order seven bits are appended as increasingly more significant bits in the
    * resulting integer value. Thus values from zero to 127 may be stored in a single
    * byte, values from 128 to 16,383 may be stored in two bytes, and so on.</p>
    * <p>VByte Encoding Example</p>
    * <table cellspacing="0" cellpadding="2" border="0">
    * <col width="64*">
    * <col width="64*">
    * <col width="64*">
    * <col width="64*">
    * <tr valign="top">
    *   <th align="left" width="25%">Value</th>
    *   <th align="left" width="25%">Byte 1</th>
    *   <th align="left" width="25%">Byte 2</th>
    *   <th align="left" width="25%">Byte 3</th>
    * </tr>
    * <tr valign="bottom">
    *   <td width="25%">0</td>
    *   <td width="25%"><kbd>00000000</kbd></td>
    *   <td width="25%"></td>
    *   <td width="25%"></td>
    * </tr>
    * <tr valign="bottom">
    *   <td width="25%">1</td>
    *   <td width="25%"><kbd>00000001</kbd></td>
    *   <td width="25%"></td>
    *   <td width="25%"></td>
    * </tr>
    * <tr valign="bottom">
    *   <td width="25%">2</td>
    *   <td width="25%"><kbd>00000010</kbd></td>
    *   <td width="25%"></td>
    *   <td width="25%"></td>
    * </tr>
    * <tr>
    *   <td valign="top" width="25%">...</td>
    *   <td valign="bottom" width="25%"></td>
    *   <td valign="bottom" width="25%"></td>
    *   <td valign="bottom" width="25%"></td>
    * </tr>
    * <tr valign="bottom">
    *   <td width="25%">127</td>
    *   <td width="25%"><kbd>01111111</kbd></td>
    *   <td width="25%"></td>
    *   <td width="25%"></td>
    * </tr>
    * <tr valign="bottom">
    *   <td width="25%">128</td>
    *   <td width="25%"><kbd>10000000</kbd></td>
    *   <td width="25%"><kbd>00000001</kbd></td>
    *   <td width="25%"></td>
    * </tr>
    * <tr valign="bottom">
    *   <td width="25%">129</td>
    *   <td width="25%"><kbd>10000001</kbd></td>
    *   <td width="25%"><kbd>00000001</kbd></td>
    *   <td width="25%"></td>
    * </tr>
    * <tr valign="bottom">
    *   <td width="25%">130</td>
    *   <td width="25%"><kbd>10000010</kbd></td>
    *   <td width="25%"><kbd>00000001</kbd></td>
    *   <td width="25%"></td>
    * </tr>
    * <tr>
    *   <td valign="top" width="25%">...</td>
    *   <td width="25%"></td>
    *   <td width="25%"></td>
    *   <td width="25%"></td>
    * </tr>
    * <tr valign="bottom">
    *   <td width="25%">16,383</td>
    *   <td width="25%"><kbd>11111111</kbd></td>
    *   <td width="25%"><kbd>01111111</kbd></td>
    *   <td width="25%"></td>
    * </tr>
    * <tr valign="bottom">
    *   <td width="25%">16,384</td>
    *   <td width="25%"><kbd>10000000</kbd></td>
    *   <td width="25%"><kbd>10000000</kbd></td>
    *   <td width="25%"><kbd>00000001</kbd></td>
    * </tr>
    * <tr valign="bottom">
    *   <td width="25%">16,385</td>
    *   <td width="25%"><kbd>10000001</kbd></td>
    *   <td width="25%"><kbd>10000000</kbd></td>
    *   <td width="25%"><kbd>00000001</kbd></td>
    * </tr>
    * <tr>
    *   <td valign="top" width="25%">...</td>
    *   <td valign="bottom" width="25%"></td>
    *   <td valign="bottom" width="25%"></td>
    *   <td valign="bottom" width="25%"></td>
    * </tr>
    * </table>
    * <p>This provides compression while still being efficient to decode.</p>
    *
    * @param i Smaller values take fewer bytes.  Negative numbers are
    * supported, but should be avoided.
    * @throws IOException If there is an I/O error writing to the underlying medium.
    * @see DataInput#readVInt()
    */
   public final void writeVInt(int i) throws IOException {
     while ((i & ~0x7F) != 0) {
       writeByte((byte)((i & 0x7F) | 0x80));
       i >>>= 7;
     }
     writeByte((byte)i);
   }

   /** Writes a long as eight bytes.
    * <p>
    * 64-bit unsigned integer written as eight bytes, high-order bytes first.
    *
    * @see DataInput#readLong()
    */
   public void writeLong(long i) throws IOException {
     writeInt((int) (i >> 32));
     writeInt((int) i);
   }

   /** Writes an long in a variable-length format.  Writes between one and nine
    * bytes.  Smaller values take fewer bytes.  Negative numbers are not
    * supported.
    * <p>
    * The format is described further in {@link DataOutput#writeVInt(int)}.
    * @see DataInput#readVLong()
    */
   public final void writeVLong(long i) throws IOException {
     assert i >= 0L;
     while ((i & ~0x7FL) != 0L) {
       writeByte((byte)((i & 0x7FL) | 0x80L));
       i >>>= 7;
     }
     writeByte((byte)i);
   }

   /** Writes a string.
    * <p>
    * Writes strings as UTF-8 encoded bytes. First the length, in bytes, is
    * written as a {@link #writeVInt VInt}, followed by the bytes.
    *
    * @see DataInput#readString()
    */
   public void writeString(String s) throws IOException {
     final BytesRef utf8Result = new BytesRef(10);
     UnicodeUtil.UTF16toUTF8(s, 0, s.length(), utf8Result);
     writeVInt(utf8Result.length);
     writeBytes(utf8Result.bytes, 0, utf8Result.length);
   }

   private static int COPY_BUFFER_SIZE = 16384;
   private byte[] copyBuffer;

   /** Copy numBytes bytes from input to ourself. */
   public void copyBytes(DataInput input, long numBytes) throws IOException {
     assert numBytes >= 0: "numBytes=" + numBytes;
     long left = numBytes;
     if (copyBuffer == null)
       copyBuffer = new byte[COPY_BUFFER_SIZE];
     while(left > 0) {
       final int toCopy;
       if (left > COPY_BUFFER_SIZE)
         toCopy = COPY_BUFFER_SIZE;
       else
         toCopy = (int) left;
       input.readBytes(copyBuffer, 0, toCopy);
       writeBytes(copyBuffer, 0, toCopy);
       left -= toCopy;
     }
   }

   /**
    * Writes a String map.
    * <p>
    * First the size is written as an {@link #writeInt(int) Int32},
    * followed by each key-value pair written as two consecutive
    * {@link #writeString(String) String}s.
    *
    * @param map Input map. May be null (equivalent to an empty map)
    */
   public void writeStringStringMap(Map<String,String> map) throws IOException {
     if (map == null) {
       writeInt(0);
     } else {
       writeInt(map.size());
       for(final Map.Entry<String, String> entry: map.entrySet()) {
         writeString(entry.getKey());
         writeString(entry.getValue());
       }
     }
   }

   /**
    * Writes a String set.
    * <p>
    * First the size is written as an {@link #writeInt(int) Int32},
    * followed by each value written as a
    * {@link #writeString(String) String}.
    *
    * @param set Input set. May be null (equivalent to an empty set)
    */
   public void writeStringSet(Set<String> set) throws IOException {
     if (set == null) {
       writeInt(0);
     } else {
       writeInt(set.size());
       for(String value : set) {
         writeString(value);
       }
     }
   }
 }
	package org.apache.lucene.store;

	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	import java.io.IOException;
	import java.util.Map;
	import java.util.Set;

	import org.apache.lucene.util.BytesRef;
	import org.apache.lucene.util.UnicodeUtil;

	/**
	* Abstract base class for performing write operations of Lucene's low-level
	* data types.

	* <p>{@code DataOutput} may only be used from one thread, because it is not
	* thread safe (it keeps internal state like file position).
	*/
	public abstract class DataOutput {

	/** Writes a single byte.
	* <p>
	* The most primitive data type is an eight-bit byte. Files are
	* accessed as sequences of bytes. All other data types are defined
	* as sequences of bytes, so file formats are byte-order independent.
	*
	* @see IndexInput#readByte()
	*/
	public abstract void writeByte(byte b) throws IOException;

	/** Writes an array of bytes.
	* @param b the bytes to write
	* @param length the number of bytes to write
	* @see DataInput#readBytes(byte[],int,int)
	*/
	public void writeBytes(byte[] b, int length) throws IOException {
	writeBytes(b, 0, length);
	}

	/** Writes an array of bytes.
	* @param b the bytes to write
	* @param offset the offset in the byte array
	* @param length the number of bytes to write
	* @see DataInput#readBytes(byte[],int,int)
	*/
	public abstract void writeBytes(byte[] b, int offset, int length) throws IOException;

	/** Writes an int as four bytes.
	* <p>
	* 32-bit unsigned integer written as four bytes, high-order bytes first.
	*
	* @see DataInput#readInt()
	*/
	public void writeInt(int i) throws IOException {
	writeByte((byte)(i >> 24));
	writeByte((byte)(i >> 16));
	writeByte((byte)(i >> 8));
	writeByte((byte) i);
	}

	/** Writes a short as two bytes.
	* @see DataInput#readShort()
	*/
	public void writeShort(short i) throws IOException {
	writeByte((byte)(i >> 8));
	writeByte((byte) i);
	}

	/** Writes an int in a variable-length format. Writes between one and
	* five bytes. Smaller values take fewer bytes. Negative numbers are
	* supported, but should be avoided.
	* <p>VByte is a variable-length format for positive integers is defined where the
	* high-order bit of each byte indicates whether more bytes remain to be read. The
	* low-order seven bits are appended as increasingly more significant bits in the
	* resulting integer value. Thus values from zero to 127 may be stored in a single
	* byte, values from 128 to 16,383 may be stored in two bytes, and so on.</p>
	* <p>VByte Encoding Example</p>
	* <table cellspacing="0" cellpadding="2" border="0">
	* <col width="64*">
	* <col width="64*">
	* <col width="64*">
	* <col width="64*">
	* <tr valign="top">
	* <th align="left" width="25%">Value</th>
	* <th align="left" width="25%">Byte 1</th>
	* <th align="left" width="25%">Byte 2</th>
	* <th align="left" width="25%">Byte 3</th>
	* </tr>
	* <tr valign="bottom">
	* <td width="25%">0</td>
	* <td width="25%"><kbd>00000000</kbd></td>
	* <td width="25%"></td>
	* <td width="25%"></td>
	* </tr>
	* <tr valign="bottom">
	* <td width="25%">1</td>
	* <td width="25%"><kbd>00000001</kbd></td>
	* <td width="25%"></td>
	* <td width="25%"></td>
	* </tr>
	* <tr valign="bottom">
	* <td width="25%">2</td>
	* <td width="25%"><kbd>00000010</kbd></td>
	* <td width="25%"></td>
	* <td width="25%"></td>
	* </tr>
	* <tr>
	* <td valign="top" width="25%">...</td>
	* <td valign="bottom" width="25%"></td>
	* <td valign="bottom" width="25%"></td>
	* <td valign="bottom" width="25%"></td>
	* </tr>
	* <tr valign="bottom">
	* <td width="25%">127</td>
	* <td width="25%"><kbd>01111111</kbd></td>
	* <td width="25%"></td>
	* <td width="25%"></td>
	* </tr>
	* <tr valign="bottom">
	* <td width="25%">128</td>
	* <td width="25%"><kbd>10000000</kbd></td>
	* <td width="25%"><kbd>00000001</kbd></td>
	* <td width="25%"></td>
	* </tr>
	* <tr valign="bottom">
	* <td width="25%">129</td>
	* <td width="25%"><kbd>10000001</kbd></td>
	* <td width="25%"><kbd>00000001</kbd></td>
	* <td width="25%"></td>
	* </tr>
	* <tr valign="bottom">
	* <td width="25%">130</td>
	* <td width="25%"><kbd>10000010</kbd></td>
	* <td width="25%"><kbd>00000001</kbd></td>
	* <td width="25%"></td>
	* </tr>
	* <tr>
	* <td valign="top" width="25%">...</td>
	* <td width="25%"></td>
	* <td width="25%"></td>
	* <td width="25%"></td>
	* </tr>
	* <tr valign="bottom">
	* <td width="25%">16,383</td>
	* <td width="25%"><kbd>11111111</kbd></td>
	* <td width="25%"><kbd>01111111</kbd></td>
	* <td width="25%"></td>
	* </tr>
	* <tr valign="bottom">
	* <td width="25%">16,384</td>
	* <td width="25%"><kbd>10000000</kbd></td>
	* <td width="25%"><kbd>10000000</kbd></td>
	* <td width="25%"><kbd>00000001</kbd></td>
	* </tr>
	* <tr valign="bottom">
	* <td width="25%">16,385</td>
	* <td width="25%"><kbd>10000001</kbd></td>
	* <td width="25%"><kbd>10000000</kbd></td>
	* <td width="25%"><kbd>00000001</kbd></td>
	* </tr>
	* <tr>
	* <td valign="top" width="25%">...</td>
	* <td valign="bottom" width="25%"></td>
	* <td valign="bottom" width="25%"></td>
	* <td valign="bottom" width="25%"></td>
	* </tr>
	* </table>
	* <p>This provides compression while still being efficient to decode.</p>
	*
	* @param i Smaller values take fewer bytes. Negative numbers are
	* supported, but should be avoided.
	* @throws IOException If there is an I/O error writing to the underlying medium.
	* @see DataInput#readVInt()
	*/
	public final void writeVInt(int i) throws IOException {
	while ((i & ~0x7F) != 0) {
	writeByte((byte)((i & 0x7F) \| 0x80));
	i >>>= 7;
	}
	writeByte((byte)i);
	}

	/** Writes a long as eight bytes.
	* <p>
	* 64-bit unsigned integer written as eight bytes, high-order bytes first.
	*
	* @see DataInput#readLong()
	*/
	public void writeLong(long i) throws IOException {
	writeInt((int) (i >> 32));
	writeInt((int) i);
	}

	/** Writes an long in a variable-length format. Writes between one and nine
	* bytes. Smaller values take fewer bytes. Negative numbers are not
	* supported.
	* <p>
	* The format is described further in {@link DataOutput#writeVInt(int)}.
	* @see DataInput#readVLong()
	*/
	public final void writeVLong(long i) throws IOException {
	assert i >= 0L;
	while ((i & ~0x7FL) != 0L) {
	writeByte((byte)((i & 0x7FL) \| 0x80L));
	i >>>= 7;
	}
	writeByte((byte)i);
	}

	/** Writes a string.
	* <p>
	* Writes strings as UTF-8 encoded bytes. First the length, in bytes, is
	* written as a {@link #writeVInt VInt}, followed by the bytes.
	*
	* @see DataInput#readString()
	*/
	public void writeString(String s) throws IOException {
	final BytesRef utf8Result = new BytesRef(10);
	UnicodeUtil.UTF16toUTF8(s, 0, s.length(), utf8Result);
	writeVInt(utf8Result.length);
	writeBytes(utf8Result.bytes, 0, utf8Result.length);
	}

	private static int COPY_BUFFER_SIZE = 16384;
	private byte[] copyBuffer;

	/** Copy numBytes bytes from input to ourself. */
	public void copyBytes(DataInput input, long numBytes) throws IOException {
	assert numBytes >= 0: "numBytes=" + numBytes;
	long left = numBytes;
	if (copyBuffer == null)
	copyBuffer = new byte[COPY_BUFFER_SIZE];
	while(left > 0) {
	final int toCopy;
	if (left > COPY_BUFFER_SIZE)
	toCopy = COPY_BUFFER_SIZE;
	else
	toCopy = (int) left;
	input.readBytes(copyBuffer, 0, toCopy);
	writeBytes(copyBuffer, 0, toCopy);
	left -= toCopy;
	}
	}

	/**
	* Writes a String map.
	* <p>
	* First the size is written as an {@link #writeInt(int) Int32},
	* followed by each key-value pair written as two consecutive
	* {@link #writeString(String) String}s.
	*
	* @param map Input map. May be null (equivalent to an empty map)
	*/
	public void writeStringStringMap(Map<String,String> map) throws IOException {
	if (map == null) {
	writeInt(0);
	} else {
	writeInt(map.size());
	for(final Map.Entry<String, String> entry: map.entrySet()) {
	writeString(entry.getKey());
	writeString(entry.getValue());
	}
	}
	}

	/**
	* Writes a String set.
	* <p>
	* First the size is written as an {@link #writeInt(int) Int32},
	* followed by each value written as a
	* {@link #writeString(String) String}.
	*
	* @param set Input set. May be null (equivalent to an empty set)
	*/
	public void writeStringSet(Set<String> set) throws IOException {
	if (set == null) {
	writeInt(0);
	} else {
	writeInt(set.size());
	for(String value : set) {
	writeString(value);
	}
	}
	}
	}