| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.tomcat.util.buf; |
| |
| import java.nio.ByteBuffer; |
| import java.nio.CharBuffer; |
| import java.nio.charset.CharsetEncoder; |
| import java.nio.charset.CoderResult; |
| import java.nio.charset.StandardCharsets; |
| |
| /** |
| * Encodes characters as bytes using UTF-8. Extracted from Apache Harmony with |
| * some minor bug fixes applied. |
| */ |
| public class Utf8Encoder extends CharsetEncoder { |
| |
| public Utf8Encoder() { |
| super(StandardCharsets.UTF_8, 1.1f, 4.0f); |
| } |
| |
| @Override |
| protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) { |
| if (in.hasArray() && out.hasArray()) { |
| return encodeHasArray(in, out); |
| } |
| return encodeNotHasArray(in, out); |
| } |
| |
| private CoderResult encodeHasArray(CharBuffer in, ByteBuffer out) { |
| int outRemaining = out.remaining(); |
| int pos = in.position(); |
| int limit = in.limit(); |
| byte[] bArr; |
| char[] cArr; |
| int x = pos; |
| bArr = out.array(); |
| cArr = in.array(); |
| int outPos = out.position(); |
| int rem = in.remaining(); |
| for (x = pos; x < pos + rem; x++) { |
| int jchar = (cArr[x] & 0xFFFF); |
| |
| if (jchar <= 0x7F) { |
| if (outRemaining < 1) { |
| in.position(x); |
| out.position(outPos); |
| return CoderResult.OVERFLOW; |
| } |
| bArr[outPos++] = (byte) (jchar & 0xFF); |
| outRemaining--; |
| } else if (jchar <= 0x7FF) { |
| |
| if (outRemaining < 2) { |
| in.position(x); |
| out.position(outPos); |
| return CoderResult.OVERFLOW; |
| } |
| bArr[outPos++] = (byte) (0xC0 + ((jchar >> 6) & 0x1F)); |
| bArr[outPos++] = (byte) (0x80 + (jchar & 0x3F)); |
| outRemaining -= 2; |
| |
| } else if (jchar >= 0xD800 && jchar <= 0xDFFF) { |
| |
| // in has to have one byte more. |
| if (limit <= x + 1) { |
| in.position(x); |
| out.position(outPos); |
| return CoderResult.UNDERFLOW; |
| } |
| |
| if (outRemaining < 4) { |
| in.position(x); |
| out.position(outPos); |
| return CoderResult.OVERFLOW; |
| } |
| |
| // The surrogate pair starts with a low-surrogate. |
| if (jchar >= 0xDC00) { |
| in.position(x); |
| out.position(outPos); |
| return CoderResult.malformedForLength(1); |
| } |
| |
| int jchar2 = cArr[x + 1] & 0xFFFF; |
| |
| // The surrogate pair ends with a high-surrogate. |
| if (jchar2 < 0xDC00) { |
| in.position(x); |
| out.position(outPos); |
| return CoderResult.malformedForLength(1); |
| } |
| |
| // Note, the Unicode scalar value n is defined |
| // as follows: |
| // n = (jchar-0xD800)*0x400+(jchar2-0xDC00)+0x10000 |
| // Where jchar is a high-surrogate, |
| // jchar2 is a low-surrogate. |
| int n = (jchar << 10) + jchar2 + 0xFCA02400; |
| |
| bArr[outPos++] = (byte) (0xF0 + ((n >> 18) & 0x07)); |
| bArr[outPos++] = (byte) (0x80 + ((n >> 12) & 0x3F)); |
| bArr[outPos++] = (byte) (0x80 + ((n >> 6) & 0x3F)); |
| bArr[outPos++] = (byte) (0x80 + (n & 0x3F)); |
| outRemaining -= 4; |
| x++; |
| |
| } else { |
| |
| if (outRemaining < 3) { |
| in.position(x); |
| out.position(outPos); |
| return CoderResult.OVERFLOW; |
| } |
| bArr[outPos++] = (byte) (0xE0 + ((jchar >> 12) & 0x0F)); |
| bArr[outPos++] = (byte) (0x80 + ((jchar >> 6) & 0x3F)); |
| bArr[outPos++] = (byte) (0x80 + (jchar & 0x3F)); |
| outRemaining -= 3; |
| } |
| if (outRemaining == 0) { |
| in.position(x + 1); |
| out.position(outPos); |
| // If both input and output are exhausted, return UNDERFLOW |
| if (x + 1 == limit) { |
| return CoderResult.UNDERFLOW; |
| } else { |
| return CoderResult.OVERFLOW; |
| } |
| } |
| |
| } |
| if (rem != 0) { |
| in.position(x); |
| out.position(outPos); |
| } |
| return CoderResult.UNDERFLOW; |
| } |
| |
| private CoderResult encodeNotHasArray(CharBuffer in, ByteBuffer out) { |
| int outRemaining = out.remaining(); |
| int pos = in.position(); |
| int limit = in.limit(); |
| try { |
| while (pos < limit) { |
| if (outRemaining == 0) { |
| return CoderResult.OVERFLOW; |
| } |
| |
| int jchar = (in.get() & 0xFFFF); |
| |
| if (jchar <= 0x7F) { |
| |
| if (outRemaining < 1) { |
| return CoderResult.OVERFLOW; |
| } |
| out.put((byte) jchar); |
| outRemaining--; |
| |
| } else if (jchar <= 0x7FF) { |
| |
| if (outRemaining < 2) { |
| return CoderResult.OVERFLOW; |
| } |
| out.put((byte) (0xC0 + ((jchar >> 6) & 0x1F))); |
| out.put((byte) (0x80 + (jchar & 0x3F))); |
| outRemaining -= 2; |
| |
| } else if (jchar >= 0xD800 && jchar <= 0xDFFF) { |
| |
| // in has to have one byte more. |
| if (limit <= pos + 1) { |
| return CoderResult.UNDERFLOW; |
| } |
| |
| if (outRemaining < 4) { |
| return CoderResult.OVERFLOW; |
| } |
| |
| // The surrogate pair starts with a low-surrogate. |
| if (jchar >= 0xDC00) { |
| return CoderResult.malformedForLength(1); |
| } |
| |
| int jchar2 = (in.get() & 0xFFFF); |
| |
| // The surrogate pair ends with a high-surrogate. |
| if (jchar2 < 0xDC00) { |
| return CoderResult.malformedForLength(1); |
| } |
| |
| // Note, the Unicode scalar value n is defined |
| // as follows: |
| // n = (jchar-0xD800)*0x400+(jchar2-0xDC00)+0x10000 |
| // Where jchar is a high-surrogate, |
| // jchar2 is a low-surrogate. |
| int n = (jchar << 10) + jchar2 + 0xFCA02400; |
| |
| out.put((byte) (0xF0 + ((n >> 18) & 0x07))); |
| out.put((byte) (0x80 + ((n >> 12) & 0x3F))); |
| out.put((byte) (0x80 + ((n >> 6) & 0x3F))); |
| out.put((byte) (0x80 + (n & 0x3F))); |
| outRemaining -= 4; |
| pos++; |
| |
| } else { |
| |
| if (outRemaining < 3) { |
| return CoderResult.OVERFLOW; |
| } |
| out.put((byte) (0xE0 + ((jchar >> 12) & 0x0F))); |
| out.put((byte) (0x80 + ((jchar >> 6) & 0x3F))); |
| out.put((byte) (0x80 + (jchar & 0x3F))); |
| outRemaining -= 3; |
| } |
| pos++; |
| } |
| } finally { |
| in.position(pos); |
| } |
| return CoderResult.UNDERFLOW; |
| } |
| } |