src/main/java/com/sleepycat/util/UtfOps.java - doris-thirdparty - Git at Google

 /*-
  * Copyright (C) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
  *
  * This file was distributed by Oracle as part of a version of Oracle Berkeley
  * DB Java Edition made available at:
  *
  * http://www.oracle.com/technetwork/database/database-technologies/berkeleydb/downloads/index.html
  *
  * Please see the LICENSE file included in the top-level directory of the
  * appropriate version of Oracle Berkeley DB Java Edition for a copy of the
  * license and additional information.
  */

 package com.sleepycat.util;

 /**
  * UTF operations with more flexibility than is provided by DataInput and
  * DataOutput.
  *
  * @author Mark Hayes
  */
 public class UtfOps {

     private static byte[] EMPTY_BYTES = {};
     private static String EMPTY_STRING = "";

     /**
      * Returns the byte length of a null terminated UTF string, not including
      * the terminator.
      *
      * @param bytes the data containing the UTF string.
      *
      * @param offset the beginning of the string the measure.
      *
      * @throws IndexOutOfBoundsException if no zero terminator is found.
      *
      * @return the number of bytes.
      */
     public static int getZeroTerminatedByteLength(byte[] bytes, int offset)
         throws IndexOutOfBoundsException {

         int len = 0;
         while (bytes[offset++] != 0) {
             len++;
         }
         return len;
     }

     /**
      * Returns the byte length of the UTF string that would be created by
      * converting the given characters to UTF.
      *
      * @param chars the characters that would be converted.
      *
      * @return the byte length of the equivalent UTF data.
      */
     public static int getByteLength(char[] chars) {

         return getByteLength(chars, 0, chars.length);
     }

     /**
      * Returns the byte length of the UTF string that would be created by
      * converting the given characters to UTF.
      *
      * @param chars the characters that would be converted.
      *
      * @param offset the first character to be converted.
      *
      * @param length the number of characters to be converted.
      *
      * @return the byte length of the equivalent UTF data.
      */
     public static int getByteLength(char[] chars, int offset, int length) {

         int len = 0;
         length += offset;
         for (int i = offset; i < length; i++) {
             int c = chars[i];
             if ((c >= 0x0001) && (c <= 0x007F)) {
                 len++;
             } else if (c > 0x07FF) {
                 len += 3;
             } else {
                 len += 2;
             }
         }
         return len;
     }

     /**
      * Returns the number of characters represented by the given UTF string.
      *
      * @param bytes the UTF string.
      *
      * @return the number of characters.
      *
      * @throws IndexOutOfBoundsException if a UTF character sequence at the end
      * of the data is not complete.
      *
      * @throws IllegalArgumentException if an illegal UTF sequence is
      * encountered.
      */
     public static int getCharLength(byte[] bytes)
         throws IllegalArgumentException, IndexOutOfBoundsException {

         return getCharLength(bytes, 0, bytes.length);
     }

     /**
      * Returns the number of characters represented by the given UTF string.
      *
      * @param bytes the data containing the UTF string.
      *
      * @param offset the first byte to be converted.
      *
      * @param length the number of byte to be converted.
      *
      * @return the number of characters.
      *
      * @throws IndexOutOfBoundsException if a UTF character sequence at the end
      * of the data is not complete.
      *
      * @throws IllegalArgumentException if an illegal UTF sequence is
      * encountered.
      */
     public static int getCharLength(byte[] bytes, int offset, int length)
         throws IllegalArgumentException, IndexOutOfBoundsException {

         int charCount = 0;
         length += offset;
         while (offset < length) {
             switch ((bytes[offset] & 0xff) >> 4) {
             case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
                 offset++;
                 break;
             case 12: case 13:
                 offset += 2;
                 break;
             case 14:
                 offset += 3;
                 break;
             default:
                 throw new IllegalArgumentException();
             }
             charCount++;
         }
         return charCount;
     }

     /**
      * Converts byte arrays into character arrays.
      *
      * @param bytes the source byte data to convert
      *
      * @param byteOffset the offset into the byte array at which
      * to start the conversion
      *
      * @param chars the destination array
      *
      * @param charOffset the offset into chars at which to begin the copy
      *
      * @param len the amount of information to copy into chars
      *
      * @param isByteLen if true then len is a measure of bytes, otherwise
      * len is a measure of characters
      *
      * @return the byte offset after converting the bytes.
      *
      * @throws IndexOutOfBoundsException if a UTF character sequence at the end
      * of the data is not complete.
      *
      * @throws IllegalArgumentException if an illegal UTF sequence is
      * encountered.
      */
     public static int bytesToChars(byte[] bytes, int byteOffset,
                                    char[] chars, int charOffset,
                                    int len, boolean isByteLen)
         throws IllegalArgumentException, IndexOutOfBoundsException {

         int char1, char2, char3;
         len += isByteLen ? byteOffset : charOffset;
         while ((isByteLen ? byteOffset : charOffset) < len) {
             char1 = bytes[byteOffset++] & 0xff;
             switch ((char1 & 0xff) >> 4) {
             case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
                 chars[charOffset++] = (char) char1;
                 break;
             case 12: case 13:
                 char2 = bytes[byteOffset++];
                 if ((char2 & 0xC0) != 0x80) {
                     throw new IllegalArgumentException();
                 }
                 chars[charOffset++] = (char)(((char1 & 0x1F) << 6) |
                                              (char2 & 0x3F));
                 break;
             case 14:
                 char2 = bytes[byteOffset++];
                 char3 = bytes[byteOffset++];
                 if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80))
                     throw new IllegalArgumentException();
                 chars[charOffset++] = (char)(((char1 & 0x0F) << 12) |
                                              ((char2 & 0x3F) << 6)  |
                                              ((char3 & 0x3F) << 0));
                 break;
             default:
                 throw new IllegalArgumentException();
             }
         }
         return byteOffset;
     }

     /**
      * Converts character arrays into byte arrays.
      *
      * @param chars the source character data to convert
      *
      * @param charOffset the offset into the character array at which
      * to start the conversion
      *
      * @param bytes the destination array
      *
      * @param byteOffset the offset into bytes at which to begin the copy
      *
      * @param charLength the length of characters to copy into bytes
      */
     public static void charsToBytes(char[] chars, int charOffset,
                                     byte[] bytes, int byteOffset,
                                     int charLength) {
         charLength += charOffset;
         for (int i = charOffset; i < charLength; i++) {
             int c = chars[i];
             if ((c >= 0x0001) && (c <= 0x007F)) {
                 bytes[byteOffset++] = (byte) c;
             } else if (c > 0x07FF) {
                 bytes[byteOffset++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
                 bytes[byteOffset++] = (byte) (0x80 | ((c >>  6) & 0x3F));
                 bytes[byteOffset++] = (byte) (0x80 | ((c >>  0) & 0x3F));
             } else {
                 bytes[byteOffset++] = (byte) (0xC0 | ((c >>  6) & 0x1F));
                 bytes[byteOffset++] = (byte) (0x80 | ((c >>  0) & 0x3F));
             }
         }
     }

     /**
      * Converts byte arrays into strings.
      *
      * @param bytes the source byte data to convert
      *
      * @param offset the offset into the byte array at which
      * to start the conversion
      *
      * @param length the number of bytes to be converted.
      *
      * @return the string.
      *
      * @throws IndexOutOfBoundsException if a UTF character sequence at the end
      * of the data is not complete.
      *
      * @throws IllegalArgumentException if an illegal UTF sequence is
      * encountered.
      */
     public static String bytesToString(byte[] bytes, int offset, int length)
         throws IllegalArgumentException, IndexOutOfBoundsException {

         if (length == 0) return EMPTY_STRING;
         int charLen = UtfOps.getCharLength(bytes, offset, length);
         char[] chars = new char[charLen];
         UtfOps.bytesToChars(bytes, offset, chars, 0, length, true);
         return new String(chars, 0, charLen);
     }

     /**
      * Converts strings to byte arrays.
      *
      * @param string the string to convert.
      *
      * @return the UTF byte array.
      */
     public static byte[] stringToBytes(String string) {

         if (string.length() == 0) return EMPTY_BYTES;
         char[] chars = string.toCharArray();
         byte[] bytes = new byte[UtfOps.getByteLength(chars)];
         UtfOps.charsToBytes(chars, 0, bytes, 0, chars.length);
         return bytes;
     }
 }
	/*-
	* Copyright (C) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
	*
	* This file was distributed by Oracle as part of a version of Oracle Berkeley
	* DB Java Edition made available at:
	*
	* http://www.oracle.com/technetwork/database/database-technologies/berkeleydb/downloads/index.html
	*
	* Please see the LICENSE file included in the top-level directory of the
	* appropriate version of Oracle Berkeley DB Java Edition for a copy of the
	* license and additional information.
	*/

	package com.sleepycat.util;

	/**
	* UTF operations with more flexibility than is provided by DataInput and
	* DataOutput.
	*
	* @author Mark Hayes
	*/
	public class UtfOps {

	private static byte[] EMPTY_BYTES = {};
	private static String EMPTY_STRING = "";

	/**
	* Returns the byte length of a null terminated UTF string, not including
	* the terminator.
	*
	* @param bytes the data containing the UTF string.
	*
	* @param offset the beginning of the string the measure.
	*
	* @throws IndexOutOfBoundsException if no zero terminator is found.
	*
	* @return the number of bytes.
	*/
	public static int getZeroTerminatedByteLength(byte[] bytes, int offset)
	throws IndexOutOfBoundsException {

	int len = 0;
	while (bytes[offset++] != 0) {
	len++;
	}
	return len;
	}

	/**
	* Returns the byte length of the UTF string that would be created by
	* converting the given characters to UTF.
	*
	* @param chars the characters that would be converted.
	*
	* @return the byte length of the equivalent UTF data.
	*/
	public static int getByteLength(char[] chars) {

	return getByteLength(chars, 0, chars.length);
	}

	/**
	* Returns the byte length of the UTF string that would be created by
	* converting the given characters to UTF.
	*
	* @param chars the characters that would be converted.
	*
	* @param offset the first character to be converted.
	*
	* @param length the number of characters to be converted.
	*
	* @return the byte length of the equivalent UTF data.
	*/
	public static int getByteLength(char[] chars, int offset, int length) {

	int len = 0;
	length += offset;
	for (int i = offset; i < length; i++) {
	int c = chars[i];
	if ((c >= 0x0001) && (c <= 0x007F)) {
	len++;
	} else if (c > 0x07FF) {
	len += 3;
	} else {
	len += 2;
	}
	}
	return len;
	}

	/**
	* Returns the number of characters represented by the given UTF string.
	*
	* @param bytes the UTF string.
	*
	* @return the number of characters.
	*
	* @throws IndexOutOfBoundsException if a UTF character sequence at the end
	* of the data is not complete.
	*
	* @throws IllegalArgumentException if an illegal UTF sequence is
	* encountered.
	*/
	public static int getCharLength(byte[] bytes)
	throws IllegalArgumentException, IndexOutOfBoundsException {

	return getCharLength(bytes, 0, bytes.length);
	}

	/**
	* Returns the number of characters represented by the given UTF string.
	*
	* @param bytes the data containing the UTF string.
	*
	* @param offset the first byte to be converted.
	*
	* @param length the number of byte to be converted.
	*
	* @return the number of characters.
	*
	* @throws IndexOutOfBoundsException if a UTF character sequence at the end
	* of the data is not complete.
	*
	* @throws IllegalArgumentException if an illegal UTF sequence is
	* encountered.
	*/
	public static int getCharLength(byte[] bytes, int offset, int length)
	throws IllegalArgumentException, IndexOutOfBoundsException {

	int charCount = 0;
	length += offset;
	while (offset < length) {
	switch ((bytes[offset] & 0xff) >> 4) {
	case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
	offset++;
	break;
	case 12: case 13:
	offset += 2;
	break;
	case 14:
	offset += 3;
	break;
	default:
	throw new IllegalArgumentException();
	}
	charCount++;
	}
	return charCount;
	}

	/**
	* Converts byte arrays into character arrays.
	*
	* @param bytes the source byte data to convert
	*
	* @param byteOffset the offset into the byte array at which
	* to start the conversion
	*
	* @param chars the destination array
	*
	* @param charOffset the offset into chars at which to begin the copy
	*
	* @param len the amount of information to copy into chars
	*
	* @param isByteLen if true then len is a measure of bytes, otherwise
	* len is a measure of characters
	*
	* @return the byte offset after converting the bytes.
	*
	* @throws IndexOutOfBoundsException if a UTF character sequence at the end
	* of the data is not complete.
	*
	* @throws IllegalArgumentException if an illegal UTF sequence is
	* encountered.
	*/
	public static int bytesToChars(byte[] bytes, int byteOffset,
	char[] chars, int charOffset,
	int len, boolean isByteLen)
	throws IllegalArgumentException, IndexOutOfBoundsException {

	int char1, char2, char3;
	len += isByteLen ? byteOffset : charOffset;
	while ((isByteLen ? byteOffset : charOffset) < len) {
	char1 = bytes[byteOffset++] & 0xff;
	switch ((char1 & 0xff) >> 4) {
	case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
	chars[charOffset++] = (char) char1;
	break;
	case 12: case 13:
	char2 = bytes[byteOffset++];
	if ((char2 & 0xC0) != 0x80) {
	throw new IllegalArgumentException();
	}
	chars[charOffset++] = (char)(((char1 & 0x1F) << 6) \|
	(char2 & 0x3F));
	break;
	case 14:
	char2 = bytes[byteOffset++];
	char3 = bytes[byteOffset++];
	if (((char2 & 0xC0) != 0x80) \|\| ((char3 & 0xC0) != 0x80))
	throw new IllegalArgumentException();
	chars[charOffset++] = (char)(((char1 & 0x0F) << 12) \|
	((char2 & 0x3F) << 6) \|
	((char3 & 0x3F) << 0));
	break;
	default:
	throw new IllegalArgumentException();
	}
	}
	return byteOffset;
	}

	/**
	* Converts character arrays into byte arrays.
	*
	* @param chars the source character data to convert
	*
	* @param charOffset the offset into the character array at which
	* to start the conversion
	*
	* @param bytes the destination array
	*
	* @param byteOffset the offset into bytes at which to begin the copy
	*
	* @param charLength the length of characters to copy into bytes
	*/
	public static void charsToBytes(char[] chars, int charOffset,
	byte[] bytes, int byteOffset,
	int charLength) {
	charLength += charOffset;
	for (int i = charOffset; i < charLength; i++) {
	int c = chars[i];
	if ((c >= 0x0001) && (c <= 0x007F)) {
	bytes[byteOffset++] = (byte) c;
	} else if (c > 0x07FF) {
	bytes[byteOffset++] = (byte) (0xE0 \| ((c >> 12) & 0x0F));
	bytes[byteOffset++] = (byte) (0x80 \| ((c >> 6) & 0x3F));
	bytes[byteOffset++] = (byte) (0x80 \| ((c >> 0) & 0x3F));
	} else {
	bytes[byteOffset++] = (byte) (0xC0 \| ((c >> 6) & 0x1F));
	bytes[byteOffset++] = (byte) (0x80 \| ((c >> 0) & 0x3F));
	}
	}
	}

	/**
	* Converts byte arrays into strings.
	*
	* @param bytes the source byte data to convert
	*
	* @param offset the offset into the byte array at which
	* to start the conversion
	*
	* @param length the number of bytes to be converted.
	*
	* @return the string.
	*
	* @throws IndexOutOfBoundsException if a UTF character sequence at the end
	* of the data is not complete.
	*
	* @throws IllegalArgumentException if an illegal UTF sequence is
	* encountered.
	*/
	public static String bytesToString(byte[] bytes, int offset, int length)
	throws IllegalArgumentException, IndexOutOfBoundsException {

	if (length == 0) return EMPTY_STRING;
	int charLen = UtfOps.getCharLength(bytes, offset, length);
	char[] chars = new char[charLen];
	UtfOps.bytesToChars(bytes, offset, chars, 0, length, true);
	return new String(chars, 0, charLen);
	}

	/**
	* Converts strings to byte arrays.
	*
	* @param string the string to convert.
	*
	* @return the UTF byte array.
	*/
	public static byte[] stringToBytes(String string) {

	if (string.length() == 0) return EMPTY_BYTES;
	char[] chars = string.toCharArray();
	byte[] bytes = new byte[UtfOps.getByteLength(chars)];
	UtfOps.charsToBytes(chars, 0, bytes, 0, chars.length);
	return bytes;
	}
	}