java/org/apache/tomcat/util/buf/Utf8Encoder.java - tomcat80 - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.tomcat.util.buf;

 import java.nio.ByteBuffer;
 import java.nio.CharBuffer;
 import java.nio.charset.CharsetEncoder;
 import java.nio.charset.CoderResult;
 import java.nio.charset.StandardCharsets;

 /**
  * Encodes characters as bytes using UTF-8. Extracted from Apache Harmony with
  * some minor bug fixes applied.
  */
 public class Utf8Encoder extends CharsetEncoder {

     public Utf8Encoder() {
         super(StandardCharsets.UTF_8, 1.1f, 4.0f);
     }

     @Override
     protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
         if (in.hasArray() && out.hasArray()) {
             return encodeHasArray(in, out);
         }
         return encodeNotHasArray(in, out);
     }

     private CoderResult encodeHasArray(CharBuffer in, ByteBuffer out) {
         int outRemaining = out.remaining();
         int pos = in.position();
         int limit = in.limit();
         byte[] bArr;
         char[] cArr;
         int x = pos;
         bArr = out.array();
         cArr = in.array();
         int outPos = out.position();
         int rem = in.remaining();
         for (x = pos; x < pos + rem; x++) {
             int jchar = (cArr[x] & 0xFFFF);

             if (jchar <= 0x7F) {
                 if (outRemaining < 1) {
                     in.position(x);
                     out.position(outPos);
                     return CoderResult.OVERFLOW;
                 }
                 bArr[outPos++] = (byte) (jchar & 0xFF);
                 outRemaining--;
             } else if (jchar <= 0x7FF) {

                 if (outRemaining < 2) {
                     in.position(x);
                     out.position(outPos);
                     return CoderResult.OVERFLOW;
                 }
                 bArr[outPos++] = (byte) (0xC0 + ((jchar >> 6) & 0x1F));
                 bArr[outPos++] = (byte) (0x80 + (jchar & 0x3F));
                 outRemaining -= 2;

             } else if (jchar >= 0xD800 && jchar <= 0xDFFF) {

                 // in has to have one byte more.
                 if (limit <= x + 1) {
                     in.position(x);
                     out.position(outPos);
                     return CoderResult.UNDERFLOW;
                 }

                 if (outRemaining < 4) {
                     in.position(x);
                     out.position(outPos);
                     return CoderResult.OVERFLOW;
                 }

                 // The surrogate pair starts with a low-surrogate.
                 if (jchar >= 0xDC00) {
                     in.position(x);
                     out.position(outPos);
                     return CoderResult.malformedForLength(1);
                 }

                 int jchar2 = cArr[x + 1] & 0xFFFF;

                 // The surrogate pair ends with a high-surrogate.
                 if (jchar2 < 0xDC00) {
                     in.position(x);
                     out.position(outPos);
                     return CoderResult.malformedForLength(1);
                 }

                 // Note, the Unicode scalar value n is defined
                 // as follows:
                 // n = (jchar-0xD800)*0x400+(jchar2-0xDC00)+0x10000
                 // Where jchar is a high-surrogate,
                 // jchar2 is a low-surrogate.
                 int n = (jchar << 10) + jchar2 + 0xFCA02400;

                 bArr[outPos++] = (byte) (0xF0 + ((n >> 18) & 0x07));
                 bArr[outPos++] = (byte) (0x80 + ((n >> 12) & 0x3F));
                 bArr[outPos++] = (byte) (0x80 + ((n >> 6) & 0x3F));
                 bArr[outPos++] = (byte) (0x80 + (n & 0x3F));
                 outRemaining -= 4;
                 x++;

             } else {

                 if (outRemaining < 3) {
                     in.position(x);
                     out.position(outPos);
                     return CoderResult.OVERFLOW;
                 }
                 bArr[outPos++] = (byte) (0xE0 + ((jchar >> 12) & 0x0F));
                 bArr[outPos++] = (byte) (0x80 + ((jchar >> 6) & 0x3F));
                 bArr[outPos++] = (byte) (0x80 + (jchar & 0x3F));
                 outRemaining -= 3;
             }
             if (outRemaining == 0) {
                 in.position(x + 1);
                 out.position(outPos);
                 // If both input and output are exhausted, return UNDERFLOW
                 if (x + 1 == limit) {
                     return CoderResult.UNDERFLOW;
                 } else {
                     return CoderResult.OVERFLOW;
                 }
             }

         }
         if (rem != 0) {
             in.position(x);
             out.position(outPos);
         }
         return CoderResult.UNDERFLOW;
     }

     private CoderResult encodeNotHasArray(CharBuffer in, ByteBuffer out) {
         int outRemaining = out.remaining();
         int pos = in.position();
         int limit = in.limit();
         try {
             while (pos < limit) {
                 if (outRemaining == 0) {
                     return CoderResult.OVERFLOW;
                 }

                 int jchar = (in.get() & 0xFFFF);

                 if (jchar <= 0x7F) {

                     if (outRemaining < 1) {
                         return CoderResult.OVERFLOW;
                     }
                     out.put((byte) jchar);
                     outRemaining--;

                 } else if (jchar <= 0x7FF) {

                     if (outRemaining < 2) {
                         return CoderResult.OVERFLOW;
                     }
                     out.put((byte) (0xC0 + ((jchar >> 6) & 0x1F)));
                     out.put((byte) (0x80 + (jchar & 0x3F)));
                     outRemaining -= 2;

                 } else if (jchar >= 0xD800 && jchar <= 0xDFFF) {

                     // in has to have one byte more.
                     if (limit <= pos + 1) {
                         return CoderResult.UNDERFLOW;
                     }

                     if (outRemaining < 4) {
                         return CoderResult.OVERFLOW;
                     }

                     // The surrogate pair starts with a low-surrogate.
                     if (jchar >= 0xDC00) {
                         return CoderResult.malformedForLength(1);
                     }

                     int jchar2 = (in.get() & 0xFFFF);

                     // The surrogate pair ends with a high-surrogate.
                     if (jchar2 < 0xDC00) {
                         return CoderResult.malformedForLength(1);
                     }

                     // Note, the Unicode scalar value n is defined
                     // as follows:
                     // n = (jchar-0xD800)*0x400+(jchar2-0xDC00)+0x10000
                     // Where jchar is a high-surrogate,
                     // jchar2 is a low-surrogate.
                     int n = (jchar << 10) + jchar2 + 0xFCA02400;

                     out.put((byte) (0xF0 + ((n >> 18) & 0x07)));
                     out.put((byte) (0x80 + ((n >> 12) & 0x3F)));
                     out.put((byte) (0x80 + ((n >> 6) & 0x3F)));
                     out.put((byte) (0x80 + (n & 0x3F)));
                     outRemaining -= 4;
                     pos++;

                 } else {

                     if (outRemaining < 3) {
                         return CoderResult.OVERFLOW;
                     }
                     out.put((byte) (0xE0 + ((jchar >> 12) & 0x0F)));
                     out.put((byte) (0x80 + ((jchar >> 6) & 0x3F)));
                     out.put((byte) (0x80 + (jchar & 0x3F)));
                     outRemaining -= 3;
                 }
                 pos++;
             }
         } finally {
             in.position(pos);
         }
         return CoderResult.UNDERFLOW;
     }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.tomcat.util.buf;

	import java.nio.ByteBuffer;
	import java.nio.CharBuffer;
	import java.nio.charset.CharsetEncoder;
	import java.nio.charset.CoderResult;
	import java.nio.charset.StandardCharsets;

	/**
	* Encodes characters as bytes using UTF-8. Extracted from Apache Harmony with
	* some minor bug fixes applied.
	*/
	public class Utf8Encoder extends CharsetEncoder {

	public Utf8Encoder() {
	super(StandardCharsets.UTF_8, 1.1f, 4.0f);
	}

	@Override
	protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
	if (in.hasArray() && out.hasArray()) {
	return encodeHasArray(in, out);
	}
	return encodeNotHasArray(in, out);
	}

	private CoderResult encodeHasArray(CharBuffer in, ByteBuffer out) {
	int outRemaining = out.remaining();
	int pos = in.position();
	int limit = in.limit();
	byte[] bArr;
	char[] cArr;
	int x = pos;
	bArr = out.array();
	cArr = in.array();
	int outPos = out.position();
	int rem = in.remaining();
	for (x = pos; x < pos + rem; x++) {
	int jchar = (cArr[x] & 0xFFFF);

	if (jchar <= 0x7F) {
	if (outRemaining < 1) {
	in.position(x);
	out.position(outPos);
	return CoderResult.OVERFLOW;
	}
	bArr[outPos++] = (byte) (jchar & 0xFF);
	outRemaining--;
	} else if (jchar <= 0x7FF) {

	if (outRemaining < 2) {
	in.position(x);
	out.position(outPos);
	return CoderResult.OVERFLOW;
	}
	bArr[outPos++] = (byte) (0xC0 + ((jchar >> 6) & 0x1F));
	bArr[outPos++] = (byte) (0x80 + (jchar & 0x3F));
	outRemaining -= 2;

	} else if (jchar >= 0xD800 && jchar <= 0xDFFF) {

	// in has to have one byte more.
	if (limit <= x + 1) {
	in.position(x);
	out.position(outPos);
	return CoderResult.UNDERFLOW;
	}

	if (outRemaining < 4) {
	in.position(x);
	out.position(outPos);
	return CoderResult.OVERFLOW;
	}

	// The surrogate pair starts with a low-surrogate.
	if (jchar >= 0xDC00) {
	in.position(x);
	out.position(outPos);
	return CoderResult.malformedForLength(1);
	}

	int jchar2 = cArr[x + 1] & 0xFFFF;

	// The surrogate pair ends with a high-surrogate.
	if (jchar2 < 0xDC00) {
	in.position(x);
	out.position(outPos);
	return CoderResult.malformedForLength(1);
	}

	// Note, the Unicode scalar value n is defined
	// as follows:
	// n = (jchar-0xD800)*0x400+(jchar2-0xDC00)+0x10000
	// Where jchar is a high-surrogate,
	// jchar2 is a low-surrogate.
	int n = (jchar << 10) + jchar2 + 0xFCA02400;

	bArr[outPos++] = (byte) (0xF0 + ((n >> 18) & 0x07));
	bArr[outPos++] = (byte) (0x80 + ((n >> 12) & 0x3F));
	bArr[outPos++] = (byte) (0x80 + ((n >> 6) & 0x3F));
	bArr[outPos++] = (byte) (0x80 + (n & 0x3F));
	outRemaining -= 4;
	x++;

	} else {

	if (outRemaining < 3) {
	in.position(x);
	out.position(outPos);
	return CoderResult.OVERFLOW;
	}
	bArr[outPos++] = (byte) (0xE0 + ((jchar >> 12) & 0x0F));
	bArr[outPos++] = (byte) (0x80 + ((jchar >> 6) & 0x3F));
	bArr[outPos++] = (byte) (0x80 + (jchar & 0x3F));
	outRemaining -= 3;
	}
	if (outRemaining == 0) {
	in.position(x + 1);
	out.position(outPos);
	// If both input and output are exhausted, return UNDERFLOW
	if (x + 1 == limit) {
	return CoderResult.UNDERFLOW;
	} else {
	return CoderResult.OVERFLOW;
	}
	}

	}
	if (rem != 0) {
	in.position(x);
	out.position(outPos);
	}
	return CoderResult.UNDERFLOW;
	}

	private CoderResult encodeNotHasArray(CharBuffer in, ByteBuffer out) {
	int outRemaining = out.remaining();
	int pos = in.position();
	int limit = in.limit();
	try {
	while (pos < limit) {
	if (outRemaining == 0) {
	return CoderResult.OVERFLOW;
	}

	int jchar = (in.get() & 0xFFFF);

	if (jchar <= 0x7F) {

	if (outRemaining < 1) {
	return CoderResult.OVERFLOW;
	}
	out.put((byte) jchar);
	outRemaining--;

	} else if (jchar <= 0x7FF) {

	if (outRemaining < 2) {
	return CoderResult.OVERFLOW;
	}
	out.put((byte) (0xC0 + ((jchar >> 6) & 0x1F)));
	out.put((byte) (0x80 + (jchar & 0x3F)));
	outRemaining -= 2;

	} else if (jchar >= 0xD800 && jchar <= 0xDFFF) {

	// in has to have one byte more.
	if (limit <= pos + 1) {
	return CoderResult.UNDERFLOW;
	}

	if (outRemaining < 4) {
	return CoderResult.OVERFLOW;
	}

	// The surrogate pair starts with a low-surrogate.
	if (jchar >= 0xDC00) {
	return CoderResult.malformedForLength(1);
	}

	int jchar2 = (in.get() & 0xFFFF);

	// The surrogate pair ends with a high-surrogate.
	if (jchar2 < 0xDC00) {
	return CoderResult.malformedForLength(1);
	}

	// Note, the Unicode scalar value n is defined
	// as follows:
	// n = (jchar-0xD800)*0x400+(jchar2-0xDC00)+0x10000
	// Where jchar is a high-surrogate,
	// jchar2 is a low-surrogate.
	int n = (jchar << 10) + jchar2 + 0xFCA02400;

	out.put((byte) (0xF0 + ((n >> 18) & 0x07)));
	out.put((byte) (0x80 + ((n >> 12) & 0x3F)));
	out.put((byte) (0x80 + ((n >> 6) & 0x3F)));
	out.put((byte) (0x80 + (n & 0x3F)));
	outRemaining -= 4;
	pos++;

	} else {

	if (outRemaining < 3) {
	return CoderResult.OVERFLOW;
	}
	out.put((byte) (0xE0 + ((jchar >> 12) & 0x0F)));
	out.put((byte) (0x80 + ((jchar >> 6) & 0x3F)));
	out.put((byte) (0x80 + (jchar & 0x3F)));
	outRemaining -= 3;
	}
	pos++;
	}
	} finally {
	in.position(pos);
	}
	return CoderResult.UNDERFLOW;
	}
	}