| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.commons.io.input; |
| |
| import static org.junit.jupiter.api.Assertions.assertArrayEquals; |
| import static org.junit.jupiter.api.Assertions.assertEquals; |
| import static org.junit.jupiter.api.Assertions.assertFalse; |
| import static org.junit.jupiter.api.Assertions.assertTrue; |
| import static org.junit.jupiter.api.Assertions.fail; |
| |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.nio.charset.Charset; |
| import java.util.Random; |
| import java.util.Set; |
| |
| import org.apache.commons.io.Charsets; |
| import org.junit.jupiter.api.Disabled; |
| import org.junit.jupiter.api.Test; |
| |
| public class CharSequenceInputStreamTest { |
| |
| private static final String ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; |
| private static final String LARGE_TEST_STRING; |
| |
| private static final String TEST_STRING = "\u00e0 peine arriv\u00e9s nous entr\u00e2mes dans sa chambre"; |
| |
| static { |
| final StringBuilder buffer = new StringBuilder(); |
| for (int i = 0; i < 100; i++) { |
| buffer.append(TEST_STRING); |
| } |
| LARGE_TEST_STRING = buffer.toString(); |
| } |
| |
| private final Random random = new Random(); |
| |
| private Set<String> getRequiredCharsetNames() { |
| return Charsets.requiredCharsets().keySet(); |
| } |
| |
| private void testBufferedRead(final String testString, final String charsetName) throws IOException { |
| final byte[] expected = testString.getBytes(charsetName); |
| try (InputStream in = new CharSequenceInputStream(testString, charsetName, 512)) { |
| final byte[] buffer = new byte[128]; |
| int offset = 0; while (true) { |
| int bufferOffset = random.nextInt(64); |
| final int bufferLength = random.nextInt(64); |
| int read = in.read(buffer, bufferOffset, bufferLength); |
| if (read == -1) { |
| assertEquals(expected.length, offset, "EOF: offset should equal length for charset " + charsetName); |
| break; |
| } |
| assertTrue(read <= bufferLength, "Read " + read + " <= " + bufferLength); |
| while (read > 0) { |
| assertTrue(offset < expected.length, |
| "offset for " + charsetName + " " + offset + " < " + expected.length); |
| assertEquals(expected[offset], buffer[bufferOffset], "bytes should agree for " + charsetName); |
| offset++; |
| bufferOffset++; |
| read--; |
| } |
| } |
| } |
| } |
| |
| // Unfortunately checking canEncode does not seem to work for all charsets: |
| // testBufferedRead_AvailableCharset(org.apache.commons.io.input.CharSequenceInputStreamTest) Time elapsed: 0.682 sec <<< ERROR! |
| // java.lang.UnsupportedOperationException: null |
| // at java.nio.CharBuffer.array(CharBuffer.java:940) |
| // at sun.nio.cs.ext.COMPOUND_TEXT_Encoder.encodeLoop(COMPOUND_TEXT_Encoder.java:75) |
| // at java.nio.charset.CharsetEncoder.encode(CharsetEncoder.java:544) |
| // at org.apache.commons.io.input.CharSequenceInputStream.fillBuffer(CharSequenceInputStream.java:111) |
| @Test |
| public void testBufferedRead_AvailableCharset() throws IOException { |
| for (final String csName : Charset.availableCharsets().keySet()) { |
| // prevent java.lang.UnsupportedOperationException at sun.nio.cs.ext.ISO2022_CN.newEncoder. |
| if (isAvailabilityTestableForCharset(csName)) { |
| testBufferedRead(TEST_STRING, csName); |
| } |
| } |
| } |
| |
| @Test |
| public void testBufferedRead_RequiredCharset() throws IOException { |
| for (final String csName : getRequiredCharsetNames()) { |
| testBufferedRead(TEST_STRING, csName); |
| } |
| } |
| |
| @Test |
| public void testBufferedRead_UTF8() throws IOException { |
| testBufferedRead(TEST_STRING, "UTF-8"); |
| } |
| |
| private void testCharsetMismatchInfiniteLoop(final String csName) throws IOException { |
| // Input is UTF-8 bytes: 0xE0 0xB2 0xA0 |
| final char[] inputChars = new char[] { (char) 0xE0, (char) 0xB2, (char) 0xA0 }; |
| final Charset charset = Charset.forName(csName); // infinite loop for US-ASCII, UTF-8 OK |
| try (InputStream stream = new CharSequenceInputStream(new String(inputChars), charset, 512)) { |
| while (stream.read() != -1) { |
| } |
| } |
| } |
| |
| @Test |
| public void testCharsetMismatchInfiniteLoop_RequiredCharsets() throws IOException { |
| for (final String csName : getRequiredCharsetNames()) { |
| testCharsetMismatchInfiniteLoop(csName); |
| } |
| } |
| |
| // Test is broken if readFirst > 0 |
| // This is because the initial read fills the buffer from the CharSequence |
| // so data1 gets the first buffer full; data2 will get the next buffer full |
| private void testIO_356(final int bufferSize, final int dataSize, final int readFirst, final String csName) throws Exception { |
| final CharSequenceInputStream is = new CharSequenceInputStream(ALPHABET, csName, bufferSize); |
| |
| for (int i = 0; i < readFirst; i++) { |
| final int ch = is.read(); |
| assertFalse(ch == -1); |
| } |
| |
| is.mark(dataSize); |
| |
| final byte[] data1 = new byte[dataSize]; |
| final int readCount1 = is.read(data1); |
| assertEquals(dataSize, readCount1); |
| |
| is.reset(); // should allow data to be re-read |
| |
| final byte[] data2 = new byte[dataSize]; |
| final int readCount2 = is.read(data2); |
| assertEquals(dataSize, readCount2); |
| |
| is.close(); |
| |
| // data buffers should be identical |
| assertArrayEquals(data1, data2, "bufferSize=" + bufferSize + " dataSize=" + dataSize); |
| } |
| |
| @Test |
| public void testIO_356_B10_D10_S0_UTF16() throws Exception { |
| testIO_356(10, 10, 0, "UTF-16"); |
| } |
| |
| @Test |
| public void testIO_356_B10_D10_S0_UTF8() throws Exception { |
| testIO_356(10, 10, 0, "UTF-8"); |
| } |
| |
| @Test |
| public void testIO_356_B10_D10_S1_UTF8() throws Exception { |
| testIO_356(10, 10, 1, "UTF-8"); |
| } |
| |
| @Test |
| public void testIO_356_B10_D10_S2_UTF8() throws Exception { |
| testIO_356(10, 10, 2, "UTF-8"); |
| } |
| |
| @Test |
| public void testIO_356_B10_D13_S0_UTF8() throws Exception { |
| testIO_356(10, 13, 0, "UTF-8"); |
| } |
| |
| @Test |
| public void testIO_356_B10_D13_S1_UTF8() throws Exception { |
| testIO_356(10, 13, 1, "UTF-8"); |
| } |
| |
| @Test |
| public void testIO_356_B10_D20_S0_UTF8() throws Exception { |
| testIO_356(10, 20, 0, "UTF-8"); |
| } |
| |
| private void testIO_356_Loop(final String csName, final int maxBytesPerChar) throws Exception { |
| for (int bufferSize = maxBytesPerChar; bufferSize <= 10; bufferSize++) { |
| for (int dataSize = 1; dataSize <= 20; dataSize++) { |
| testIO_356(bufferSize, dataSize, 0, csName); |
| } |
| } |
| } |
| |
| @Test |
| public void testIO_356_Loop_UTF16() throws Exception { |
| testIO_356_Loop("UTF-16", 4); |
| } |
| |
| @Test |
| public void testIO_356_Loop_UTF8() throws Exception { |
| testIO_356_Loop("UTF-8", 4); |
| } |
| |
| @Test |
| public void testLargeBufferedRead_RequiredCharsets() throws IOException { |
| for (final String csName : getRequiredCharsetNames()) { |
| testBufferedRead(LARGE_TEST_STRING, csName); |
| } |
| } |
| |
| @Test |
| public void testLargeBufferedRead_UTF8() throws IOException { |
| testBufferedRead(LARGE_TEST_STRING, "UTF-8"); |
| } |
| |
| @Test |
| public void testLargeSingleByteRead_RequiredCharsets() throws IOException { |
| for (final String csName : getRequiredCharsetNames()) { |
| testSingleByteRead(LARGE_TEST_STRING, csName); |
| } |
| } |
| |
| @Test |
| public void testLargeSingleByteRead_UTF8() throws IOException { |
| testSingleByteRead(LARGE_TEST_STRING, "UTF-8"); |
| } |
| |
| // This test is broken for charsets that don't create a single byte for each char |
| private void testMarkReset(final String csName) throws Exception { |
| try (InputStream r = new CharSequenceInputStream("test", csName)) { |
| assertEquals(2, r.skip(2)); |
| r.mark(0); |
| assertEquals('s', r.read(), csName); |
| assertEquals('t', r.read(), csName); |
| assertEquals(-1, r.read(), csName); |
| r.reset(); |
| assertEquals('s', r.read(), csName); |
| assertEquals('t', r.read(), csName); |
| assertEquals(-1, r.read(), csName); |
| r.reset(); |
| r.reset(); |
| } |
| } |
| |
| @Test |
| @Disabled // Test broken for charsets that create multiple bytes for a single char |
| public void testMarkReset_RequiredCharsets() throws Exception { |
| for (final String csName : getRequiredCharsetNames()) { |
| testMarkReset(csName); |
| } |
| } |
| |
| @Test |
| public void testMarkReset_USASCII() throws Exception { |
| testMarkReset("US-ASCII"); |
| } |
| |
| @Test |
| public void testMarkReset_UTF8() throws Exception { |
| testMarkReset("UTF-8"); |
| } |
| |
| @Test |
| public void testMarkSupported() throws Exception { |
| try (InputStream r = new CharSequenceInputStream("test", "UTF-8")) { |
| assertTrue(r.markSupported()); |
| } |
| } |
| |
| private void testReadZero(final String csName) throws Exception { |
| try (InputStream r = new CharSequenceInputStream("test", csName)) { |
| final byte[] bytes = new byte[30]; |
| assertEquals(0, r.read(bytes, 0, 0)); |
| } |
| } |
| |
| @Test |
| public void testReadZero_EmptyString() throws Exception { |
| try (InputStream r = new CharSequenceInputStream("", "UTF-8")) { |
| final byte[] bytes = new byte[30]; |
| assertEquals(0, r.read(bytes, 0, 0)); |
| } |
| } |
| |
| @Test |
| public void testReadZero_RequiredCharsets() throws Exception { |
| for (final String csName : getRequiredCharsetNames()) { |
| testReadZero(csName); |
| } |
| } |
| |
| private void testSingleByteRead(final String testString, final String charsetName) throws IOException { |
| final byte[] bytes = testString.getBytes(charsetName); |
| try (InputStream in = new CharSequenceInputStream(testString, charsetName, 512)) { |
| for (final byte b : bytes) { |
| final int read = in.read(); |
| assertTrue(read >= 0, "read " + read + " >=0 "); |
| assertTrue(read <= 255, "read " + read + " <= 255"); |
| assertEquals(b, (byte) read, "Should agree with input"); |
| } |
| assertEquals(-1, in.read()); |
| } |
| } |
| |
| @Test |
| public void testSingleByteRead_RequiredCharsets() throws IOException { |
| for (final String csName : getRequiredCharsetNames()) { |
| testSingleByteRead(TEST_STRING, csName); |
| } |
| } |
| |
| @Test |
| public void testSingleByteRead_UTF16() throws IOException { |
| testSingleByteRead(TEST_STRING, "UTF-16"); |
| } |
| |
| @Test |
| public void testSingleByteRead_UTF8() throws IOException { |
| testSingleByteRead(TEST_STRING, "UTF-8"); |
| } |
| |
| // This is broken for charsets that don't map each char to a byte |
| private void testSkip(final String csName) throws Exception { |
| try (InputStream r = new CharSequenceInputStream("test", csName)) { |
| assertEquals(1, r.skip(1)); |
| assertEquals(2, r.skip(2)); |
| assertEquals('t', r.read(), csName); |
| r.skip(100); |
| assertEquals(-1, r.read(), csName); |
| } |
| } |
| |
| @Test |
| @Disabled // test is broken for charsets that generate multiple bytes per char. |
| public void testSkip_RequiredCharsets() throws Exception { |
| for (final String csName : getRequiredCharsetNames()) { |
| testSkip(csName); |
| } |
| } |
| |
| @Test |
| public void testSkip_USASCII() throws Exception { |
| testSkip("US-ASCII"); |
| } |
| |
| @Test |
| public void testSkip_UTF8() throws Exception { |
| testSkip("UTF-8"); |
| } |
| |
| private int checkAvail(final InputStream is, final int min) throws Exception { |
| final int available = is.available(); |
| assertTrue(available >= min, "avail should be >= " + min + ", but was " + available); |
| return available; |
| } |
| |
| private void testAvailableSkip(final String csName) throws Exception { |
| final String input = "test"; |
| try (InputStream r = new CharSequenceInputStream(input, csName)) { |
| int available = checkAvail(r, input.length()); |
| assertEquals(available - 1, r.skip(available - 1)); // skip all but one |
| available = checkAvail(r, 1); |
| assertEquals(1, r.skip(1)); |
| available = checkAvail(r, 0); |
| } |
| } |
| |
| private void testAvailableRead(final String csName) throws Exception { |
| final String input = "test"; |
| try (InputStream r = new CharSequenceInputStream(input, csName)) { |
| int available = checkAvail(r, input.length()); |
| assertEquals(available - 1, r.skip(available - 1)); // skip all but one |
| available = checkAvail(r, 1); |
| final byte[] buff = new byte[available]; |
| assertEquals(available, r.read(buff, 0, available)); |
| } |
| } |
| |
| @Test |
| public void testAvailable() throws Exception { |
| for (final String csName : Charset.availableCharsets().keySet()) { |
| // prevent java.lang.UnsupportedOperationException at sun.nio.cs.ext.ISO2022_CN.newEncoder. |
| // also try and avoid the following Effor on Continuum |
| // java.lang.UnsupportedOperationException: null |
| // at java.nio.CharBuffer.array(CharBuffer.java:940) |
| // at sun.nio.cs.ext.COMPOUND_TEXT_Encoder.encodeLoop(COMPOUND_TEXT_Encoder.java:75) |
| // at java.nio.charset.CharsetEncoder.encode(CharsetEncoder.java:544) |
| // at org.apache.commons.io.input.CharSequenceInputStream.fillBuffer(CharSequenceInputStream.java:120) |
| // at org.apache.commons.io.input.CharSequenceInputStream.read(CharSequenceInputStream.java:151) |
| // at org.apache.commons.io.input.CharSequenceInputStreamTest.testAvailableRead(CharSequenceInputStreamTest.java:412) |
| // at org.apache.commons.io.input.CharSequenceInputStreamTest.testAvailable(CharSequenceInputStreamTest.java:424) |
| |
| try { |
| if (isAvailabilityTestableForCharset(csName)) { |
| testAvailableSkip(csName); |
| testAvailableRead(csName); |
| } |
| } catch (final UnsupportedOperationException e){ |
| fail("Operation not supported for " + csName); |
| } |
| } |
| } |
| |
| private boolean isAvailabilityTestableForCharset(final String csName) { |
| return Charset.forName(csName).canEncode() |
| && !"COMPOUND_TEXT".equalsIgnoreCase(csName) && !"x-COMPOUND_TEXT".equalsIgnoreCase(csName) |
| && !isOddBallLegacyCharsetThatDoesNotSupportFrenchCharacters(csName); |
| } |
| |
| private boolean isOddBallLegacyCharsetThatDoesNotSupportFrenchCharacters(final String csName) { |
| return "x-IBM1388".equalsIgnoreCase(csName) || |
| "ISO-2022-CN".equalsIgnoreCase(csName) || |
| "ISO-2022-JP".equalsIgnoreCase(csName) || |
| "Shift_JIS".equalsIgnoreCase(csName); |
| } |
| } |