flink-libraries/flink-table-common/src/main/java/org/apache/flink/table/runtime/util/StringUtf8Utils.java - flink - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.	See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.	You may obtain a copy of the License at
  *
  *		http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.flink.table.runtime.util;

 import org.apache.flink.core.memory.MemorySegment;

 import java.io.UnsupportedEncodingException;
 import java.util.Arrays;

 /**
  * String utf-8 utils.
  *
  * <p>{@code StringUtf8Utils} refers to the implementation from SerializeWriter and IOUtils of Alibaba fastjson.
  * The difference is that StringUtf8Utils need to handle the wrong code, just like StringCoding.decode.
  */
 public class StringUtf8Utils {

 	private static final int MAX_CHARS_LENGTH = 1024 * 32;
 	private static final int MAX_BYTES_LENGTH = 1024 * 64;
 	public static final int MAX_BYTES_PER_CHAR = 3;

 	private static final ThreadLocal<char[]> CHARS_LOCAL = new ThreadLocal<>();
 	private static final ThreadLocal<byte[]> BYTES_LOCAL = new ThreadLocal<>();

 	public static char[] allocateChars(int length) {
 		char[] chars = CHARS_LOCAL.get();

 		if (chars == null) {
 			if (length <= MAX_CHARS_LENGTH) {
 				chars = new char[MAX_CHARS_LENGTH];
 				CHARS_LOCAL.set(chars);
 			} else {
 				chars = new char[length];
 			}
 		} else if (chars.length < length) {
 			chars = new char[length];
 		}

 		return chars;
 	}

 	public static byte[] allocateBytes(int length) {
 		byte[] bytes = BYTES_LOCAL.get();

 		if (bytes == null) {
 			if (length <= MAX_BYTES_LENGTH) {
 				bytes = new byte[MAX_BYTES_LENGTH];
 				BYTES_LOCAL.set(bytes);
 			} else {
 				bytes = new byte[length];
 			}
 		} else if (bytes.length < length) {
 			bytes = new byte[length];
 		}

 		return bytes;
 	}

 	/**
 	 * This method must have the same result with JDK's String.getBytes.
 	 */
 	public static byte[] encodeUTF8(String str) {
 		byte[] bytes = allocateBytes(str.length() * MAX_BYTES_PER_CHAR);
 		int len = encodeUTF8(str, bytes);
 		return Arrays.copyOf(bytes, len);
 	}

 	public static int encodeUTF8(String str, byte[] bytes) {
 		int offset = 0;
 		int len = str.length();
 		int sl = offset + len;
 		int dp = 0;
 		int dlASCII = dp + Math.min(len, bytes.length);

 		// ASCII only optimized loop
 		while (dp < dlASCII && str.charAt(offset) < '\u0080') {
 			bytes[dp++] = (byte) str.charAt(offset++);
 		}

 		while (offset < sl) {
 			char c = str.charAt(offset++);
 			if (c < 0x80) {
 				// Have at most seven bits
 				bytes[dp++] = (byte) c;
 			} else if (c < 0x800) {
 				// 2 bytes, 11 bits
 				bytes[dp++] = (byte) (0xc0 | (c >> 6));
 				bytes[dp++] = (byte) (0x80 | (c & 0x3f));
 			} else if (Character.isSurrogate(c)) {
 				final int uc;
 				int ip = offset - 1;
 				if (Character.isHighSurrogate(c)) {
 					if (sl - ip < 2) {
 						uc = -1;
 					} else {
 						char d = str.charAt(ip + 1);
 						if (Character.isLowSurrogate(d)) {
 							uc = Character.toCodePoint(c, d);
 						} else {
 							// for some illegal character
 							// the jdk will ignore the origin character and cast it to '?'
 							// this acts the same with jdk
 							return defaultEncodeUTF8(str, bytes);
 						}
 					}
 				} else {
 					if (Character.isLowSurrogate(c)) {
 						// for some illegal character
 						// the jdk will ignore the origin character and cast it to '?'
 						// this acts the same with jdk
 						return defaultEncodeUTF8(str, bytes);
 					} else {
 						uc = c;
 					}
 				}

 				if (uc < 0) {
 					bytes[dp++] = (byte) '?';
 				} else {
 					bytes[dp++] = (byte) (0xf0 | ((uc >> 18)));
 					bytes[dp++] = (byte) (0x80 | ((uc >> 12) & 0x3f));
 					bytes[dp++] = (byte) (0x80 | ((uc >> 6) & 0x3f));
 					bytes[dp++] = (byte) (0x80 | (uc & 0x3f));
 					offset++; // 2 chars
 				}
 			} else {
 				// 3 bytes, 16 bits
 				bytes[dp++] = (byte) (0xe0 | ((c >> 12)));
 				bytes[dp++] = (byte) (0x80 | ((c >> 6) & 0x3f));
 				bytes[dp++] = (byte) (0x80 | (c & 0x3f));
 			}
 		}
 		return dp;
 	}

 	public static int defaultEncodeUTF8(String str, byte[] bytes) {
 		try {
 			byte[] buffer = str.getBytes("UTF-8");
 			System.arraycopy(buffer, 0, bytes, 0, buffer.length);
 			return buffer.length;
 		} catch (UnsupportedEncodingException e) {
 			throw new RuntimeException("encodeUTF8 error", e);
 		}
 	}

 	public static String decodeUTF8(byte[] input, int offset, int byteLen) {
 		char[] chars = allocateChars(byteLen);
 		int len = decodeUTF8Strict(input, offset, byteLen, chars);
 		if (len < 0) {
 			return defaultDecodeUTF8(input, offset, byteLen);
 		}
 		return new String(chars, 0, len);
 	}

 	public static int decodeUTF8Strict(byte[] sa, int sp, int len, char[] da) {
 		final int sl = sp + len;
 		int dp = 0;
 		int dlASCII = Math.min(len, da.length);

 		// ASCII only optimized loop
 		while (dp < dlASCII && sa[sp] >= 0) {
 			da[dp++] = (char) sa[sp++];
 		}

 		while (sp < sl) {
 			int b1 = sa[sp++];
 			if (b1 >= 0) {
 				// 1 byte, 7 bits: 0xxxxxxx
 				da[dp++] = (char) b1;
 			} else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
 				// 2 bytes, 11 bits: 110xxxxx 10xxxxxx
 				if (sp < sl) {
 					int b2 = sa[sp++];
 					if ((b2 & 0xc0) != 0x80) { // isNotContinuation(b2)
 						return -1;
 					} else {
 						da[dp++] = (char) (((b1 << 6) ^ b2) ^ (((byte) 0xC0 << 6) ^ ((byte) 0x80)));
 					}
 					continue;
 				}
 				return -1;
 			} else if ((b1 >> 4) == -2) {
 				// 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
 				if (sp + 1 < sl) {
 					int b2 = sa[sp++];
 					int b3 = sa[sp++];
 					if ((b1 == (byte) 0xe0 && (b2 & 0xe0) == 0x80)
 							|| (b2 & 0xc0) != 0x80
 							|| (b3 & 0xc0) != 0x80) { // isMalformed3(b1, b2, b3)
 						return -1;
 					} else {
 						char c = (char) ((b1 << 12) ^ (b2 << 6) ^ (b3 ^
 								(((byte) 0xE0 << 12) ^ ((byte) 0x80 << 6) ^ ((byte) 0x80))));
 						if (Character.isSurrogate(c)) {
 							return -1;
 						} else {
 							da[dp++] = c;
 						}
 					}
 					continue;
 				}
 				return -1;
 			} else if ((b1 >> 3) == -2) {
 				// 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
 				if (sp + 2 < sl) {
 					int b2 = sa[sp++];
 					int b3 = sa[sp++];
 					int b4 = sa[sp++];
 					int uc = ((b1 << 18) ^
 							(b2 << 12) ^
 							(b3 << 6) ^
 							(b4 ^ (((byte) 0xF0 << 18) ^ ((byte) 0x80 << 12) ^
 									((byte) 0x80 << 6) ^ ((byte) 0x80))));
 					// isMalformed4 and shortest form check
 					if (((b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 || (b4 & 0xc0) != 0x80)
 							|| !Character.isSupplementaryCodePoint(uc)) {
 						return -1;
 					} else {
 						da[dp++] = Character.highSurrogate(uc);
 						da[dp++] = Character.lowSurrogate(uc);
 					}
 					continue;
 				}
 				return -1;
 			} else {
 				return -1;
 			}
 		}
 		return dp;
 	}

 	public static String decodeUTF8(MemorySegment input, int offset, int byteLen) {
 		char[] chars = allocateChars(byteLen);
 		int len = decodeUTF8Strict(input, offset, byteLen, chars);
 		if (len < 0) {
 			byte[] bytes = allocateBytes(byteLen);
 			input.get(offset, bytes, 0, byteLen);
 			return defaultDecodeUTF8(bytes, 0, byteLen);
 		}
 		return new String(chars, 0, len);
 	}

 	public static int decodeUTF8Strict(MemorySegment segment, int sp, int len, char[] da) {
 		final int sl = sp + len;
 		int dp = 0;
 		int dlASCII = Math.min(len, da.length);

 		// ASCII only optimized loop
 		while (dp < dlASCII && segment.get(sp) >= 0) {
 			da[dp++] = (char) segment.get(sp++);
 		}

 		while (sp < sl) {
 			int b1 = segment.get(sp++);
 			if (b1 >= 0) {
 				// 1 byte, 7 bits: 0xxxxxxx
 				da[dp++] = (char) b1;
 			} else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
 				// 2 bytes, 11 bits: 110xxxxx 10xxxxxx
 				if (sp < sl) {
 					int b2 = segment.get(sp++);
 					if ((b2 & 0xc0) != 0x80) { // isNotContinuation(b2)
 						return -1;
 					} else {
 						da[dp++] = (char) (((b1 << 6) ^ b2) ^ (((byte) 0xC0 << 6) ^ ((byte) 0x80)));
 					}
 					continue;
 				}
 				return -1;
 			} else if ((b1 >> 4) == -2) {
 				// 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
 				if (sp + 1 < sl) {
 					int b2 = segment.get(sp++);
 					int b3 = segment.get(sp++);
 					if ((b1 == (byte) 0xe0 && (b2 & 0xe0) == 0x80)
 							|| (b2 & 0xc0) != 0x80
 							|| (b3 & 0xc0) != 0x80) { // isMalformed3(b1, b2, b3)
 						return -1;
 					} else {
 						char c = (char) ((b1 << 12) ^ (b2 << 6) ^ (b3 ^
 								(((byte) 0xE0 << 12) ^ ((byte) 0x80 << 6) ^ ((byte) 0x80))));
 						if (Character.isSurrogate(c)) {
 							return -1;
 						} else {
 							da[dp++] = c;
 						}
 					}
 					continue;
 				}
 				return -1;
 			} else if ((b1 >> 3) == -2) {
 				// 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
 				if (sp + 2 < sl) {
 					int b2 = segment.get(sp++);
 					int b3 = segment.get(sp++);
 					int b4 = segment.get(sp++);
 					int uc = ((b1 << 18) ^
 							(b2 << 12) ^
 							(b3 << 6) ^
 							(b4 ^ (((byte) 0xF0 << 18) ^ ((byte) 0x80 << 12) ^
 									((byte) 0x80 << 6) ^ ((byte) 0x80))));
 					// isMalformed4 and shortest form check
 					if (((b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 || (b4 & 0xc0) != 0x80)
 							|| !Character.isSupplementaryCodePoint(uc)) {
 						return -1;
 					} else {
 						da[dp++] = Character.highSurrogate(uc);
 						da[dp++] = Character.lowSurrogate(uc);
 					}
 					continue;
 				}
 				return -1;
 			} else {
 				return -1;
 			}
 		}
 		return dp;
 	}

 	public static String defaultDecodeUTF8(byte[] bytes, int offset, int len) {
 		try {
 			return new String(bytes, offset, len, "UTF-8");
 		} catch (UnsupportedEncodingException e) {
 			throw new RuntimeException("encodeUTF8 error", e);
 		}
 	}
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.flink.table.runtime.util;

	import org.apache.flink.core.memory.MemorySegment;

	import java.io.UnsupportedEncodingException;
	import java.util.Arrays;

	/**
	* String utf-8 utils.
	*
	* <p>{@code StringUtf8Utils} refers to the implementation from SerializeWriter and IOUtils of Alibaba fastjson.
	* The difference is that StringUtf8Utils need to handle the wrong code, just like StringCoding.decode.
	*/
	public class StringUtf8Utils {

	private static final int MAX_CHARS_LENGTH = 1024 * 32;
	private static final int MAX_BYTES_LENGTH = 1024 * 64;
	public static final int MAX_BYTES_PER_CHAR = 3;

	private static final ThreadLocal<char[]> CHARS_LOCAL = new ThreadLocal<>();
	private static final ThreadLocal<byte[]> BYTES_LOCAL = new ThreadLocal<>();

	public static char[] allocateChars(int length) {
	char[] chars = CHARS_LOCAL.get();

	if (chars == null) {
	if (length <= MAX_CHARS_LENGTH) {
	chars = new char[MAX_CHARS_LENGTH];
	CHARS_LOCAL.set(chars);
	} else {
	chars = new char[length];
	}
	} else if (chars.length < length) {
	chars = new char[length];
	}

	return chars;
	}

	public static byte[] allocateBytes(int length) {
	byte[] bytes = BYTES_LOCAL.get();

	if (bytes == null) {
	if (length <= MAX_BYTES_LENGTH) {
	bytes = new byte[MAX_BYTES_LENGTH];
	BYTES_LOCAL.set(bytes);
	} else {
	bytes = new byte[length];
	}
	} else if (bytes.length < length) {
	bytes = new byte[length];
	}

	return bytes;
	}

	/**
	* This method must have the same result with JDK's String.getBytes.
	*/
	public static byte[] encodeUTF8(String str) {
	byte[] bytes = allocateBytes(str.length() * MAX_BYTES_PER_CHAR);
	int len = encodeUTF8(str, bytes);
	return Arrays.copyOf(bytes, len);
	}

	public static int encodeUTF8(String str, byte[] bytes) {
	int offset = 0;
	int len = str.length();
	int sl = offset + len;
	int dp = 0;
	int dlASCII = dp + Math.min(len, bytes.length);

	// ASCII only optimized loop
	while (dp < dlASCII && str.charAt(offset) < '\u0080') {
	bytes[dp++] = (byte) str.charAt(offset++);
	}

	while (offset < sl) {
	char c = str.charAt(offset++);
	if (c < 0x80) {
	// Have at most seven bits
	bytes[dp++] = (byte) c;
	} else if (c < 0x800) {
	// 2 bytes, 11 bits
	bytes[dp++] = (byte) (0xc0 \| (c >> 6));
	bytes[dp++] = (byte) (0x80 \| (c & 0x3f));
	} else if (Character.isSurrogate(c)) {
	final int uc;
	int ip = offset - 1;
	if (Character.isHighSurrogate(c)) {
	if (sl - ip < 2) {
	uc = -1;
	} else {
	char d = str.charAt(ip + 1);
	if (Character.isLowSurrogate(d)) {
	uc = Character.toCodePoint(c, d);
	} else {
	// for some illegal character
	// the jdk will ignore the origin character and cast it to '?'
	// this acts the same with jdk
	return defaultEncodeUTF8(str, bytes);
	}
	}
	} else {
	if (Character.isLowSurrogate(c)) {
	// for some illegal character
	// the jdk will ignore the origin character and cast it to '?'
	// this acts the same with jdk
	return defaultEncodeUTF8(str, bytes);
	} else {
	uc = c;
	}
	}

	if (uc < 0) {
	bytes[dp++] = (byte) '?';
	} else {
	bytes[dp++] = (byte) (0xf0 \| ((uc >> 18)));
	bytes[dp++] = (byte) (0x80 \| ((uc >> 12) & 0x3f));
	bytes[dp++] = (byte) (0x80 \| ((uc >> 6) & 0x3f));
	bytes[dp++] = (byte) (0x80 \| (uc & 0x3f));
	offset++; // 2 chars
	}
	} else {
	// 3 bytes, 16 bits
	bytes[dp++] = (byte) (0xe0 \| ((c >> 12)));
	bytes[dp++] = (byte) (0x80 \| ((c >> 6) & 0x3f));
	bytes[dp++] = (byte) (0x80 \| (c & 0x3f));
	}
	}
	return dp;
	}

	public static int defaultEncodeUTF8(String str, byte[] bytes) {
	try {
	byte[] buffer = str.getBytes("UTF-8");
	System.arraycopy(buffer, 0, bytes, 0, buffer.length);
	return buffer.length;
	} catch (UnsupportedEncodingException e) {
	throw new RuntimeException("encodeUTF8 error", e);
	}
	}

	public static String decodeUTF8(byte[] input, int offset, int byteLen) {
	char[] chars = allocateChars(byteLen);
	int len = decodeUTF8Strict(input, offset, byteLen, chars);
	if (len < 0) {
	return defaultDecodeUTF8(input, offset, byteLen);
	}
	return new String(chars, 0, len);
	}

	public static int decodeUTF8Strict(byte[] sa, int sp, int len, char[] da) {
	final int sl = sp + len;
	int dp = 0;
	int dlASCII = Math.min(len, da.length);

	// ASCII only optimized loop
	while (dp < dlASCII && sa[sp] >= 0) {
	da[dp++] = (char) sa[sp++];
	}

	while (sp < sl) {
	int b1 = sa[sp++];
	if (b1 >= 0) {
	// 1 byte, 7 bits: 0xxxxxxx
	da[dp++] = (char) b1;
	} else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
	// 2 bytes, 11 bits: 110xxxxx 10xxxxxx
	if (sp < sl) {
	int b2 = sa[sp++];
	if ((b2 & 0xc0) != 0x80) { // isNotContinuation(b2)
	return -1;
	} else {
	da[dp++] = (char) (((b1 << 6) ^ b2) ^ (((byte) 0xC0 << 6) ^ ((byte) 0x80)));
	}
	continue;
	}
	return -1;
	} else if ((b1 >> 4) == -2) {
	// 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
	if (sp + 1 < sl) {
	int b2 = sa[sp++];
	int b3 = sa[sp++];
	if ((b1 == (byte) 0xe0 && (b2 & 0xe0) == 0x80)
	\|\| (b2 & 0xc0) != 0x80
	\|\| (b3 & 0xc0) != 0x80) { // isMalformed3(b1, b2, b3)
	return -1;
	} else {
	char c = (char) ((b1 << 12) ^ (b2 << 6) ^ (b3 ^
	(((byte) 0xE0 << 12) ^ ((byte) 0x80 << 6) ^ ((byte) 0x80))));
	if (Character.isSurrogate(c)) {
	return -1;
	} else {
	da[dp++] = c;
	}
	}
	continue;
	}
	return -1;
	} else if ((b1 >> 3) == -2) {
	// 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
	if (sp + 2 < sl) {
	int b2 = sa[sp++];
	int b3 = sa[sp++];
	int b4 = sa[sp++];
	int uc = ((b1 << 18) ^
	(b2 << 12) ^
	(b3 << 6) ^
	(b4 ^ (((byte) 0xF0 << 18) ^ ((byte) 0x80 << 12) ^
	((byte) 0x80 << 6) ^ ((byte) 0x80))));
	// isMalformed4 and shortest form check
	if (((b2 & 0xc0) != 0x80 \|\| (b3 & 0xc0) != 0x80 \|\| (b4 & 0xc0) != 0x80)
	\|\| !Character.isSupplementaryCodePoint(uc)) {
	return -1;
	} else {
	da[dp++] = Character.highSurrogate(uc);
	da[dp++] = Character.lowSurrogate(uc);
	}
	continue;
	}
	return -1;
	} else {
	return -1;
	}
	}
	return dp;
	}

	public static String decodeUTF8(MemorySegment input, int offset, int byteLen) {
	char[] chars = allocateChars(byteLen);
	int len = decodeUTF8Strict(input, offset, byteLen, chars);
	if (len < 0) {
	byte[] bytes = allocateBytes(byteLen);
	input.get(offset, bytes, 0, byteLen);
	return defaultDecodeUTF8(bytes, 0, byteLen);
	}
	return new String(chars, 0, len);
	}

	public static int decodeUTF8Strict(MemorySegment segment, int sp, int len, char[] da) {
	final int sl = sp + len;
	int dp = 0;
	int dlASCII = Math.min(len, da.length);

	// ASCII only optimized loop
	while (dp < dlASCII && segment.get(sp) >= 0) {
	da[dp++] = (char) segment.get(sp++);
	}

	while (sp < sl) {
	int b1 = segment.get(sp++);
	if (b1 >= 0) {
	// 1 byte, 7 bits: 0xxxxxxx
	da[dp++] = (char) b1;
	} else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
	// 2 bytes, 11 bits: 110xxxxx 10xxxxxx
	if (sp < sl) {
	int b2 = segment.get(sp++);
	if ((b2 & 0xc0) != 0x80) { // isNotContinuation(b2)
	return -1;
	} else {
	da[dp++] = (char) (((b1 << 6) ^ b2) ^ (((byte) 0xC0 << 6) ^ ((byte) 0x80)));
	}
	continue;
	}
	return -1;
	} else if ((b1 >> 4) == -2) {
	// 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
	if (sp + 1 < sl) {
	int b2 = segment.get(sp++);
	int b3 = segment.get(sp++);
	if ((b1 == (byte) 0xe0 && (b2 & 0xe0) == 0x80)
	\|\| (b2 & 0xc0) != 0x80
	\|\| (b3 & 0xc0) != 0x80) { // isMalformed3(b1, b2, b3)
	return -1;
	} else {
	char c = (char) ((b1 << 12) ^ (b2 << 6) ^ (b3 ^
	(((byte) 0xE0 << 12) ^ ((byte) 0x80 << 6) ^ ((byte) 0x80))));
	if (Character.isSurrogate(c)) {
	return -1;
	} else {
	da[dp++] = c;
	}
	}
	continue;
	}
	return -1;
	} else if ((b1 >> 3) == -2) {
	// 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
	if (sp + 2 < sl) {
	int b2 = segment.get(sp++);
	int b3 = segment.get(sp++);
	int b4 = segment.get(sp++);
	int uc = ((b1 << 18) ^
	(b2 << 12) ^
	(b3 << 6) ^
	(b4 ^ (((byte) 0xF0 << 18) ^ ((byte) 0x80 << 12) ^
	((byte) 0x80 << 6) ^ ((byte) 0x80))));
	// isMalformed4 and shortest form check
	if (((b2 & 0xc0) != 0x80 \|\| (b3 & 0xc0) != 0x80 \|\| (b4 & 0xc0) != 0x80)
	\|\| !Character.isSupplementaryCodePoint(uc)) {
	return -1;
	} else {
	da[dp++] = Character.highSurrogate(uc);
	da[dp++] = Character.lowSurrogate(uc);
	}
	continue;
	}
	return -1;
	} else {
	return -1;
	}
	}
	return dp;
	}

	public static String defaultDecodeUTF8(byte[] bytes, int offset, int len) {
	try {
	return new String(bytes, offset, len, "UTF-8");
	} catch (UnsupportedEncodingException e) {
	throw new RuntimeException("encodeUTF8 error", e);
	}
	}
	}