src/main/java/org/apache/commons/io/input/CharSequenceInputStream.java - commons-io - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.commons.io.input;

 import static org.apache.commons.io.IOUtils.EOF;

 import java.io.IOException;
 import java.io.InputStream;
 import java.nio.ByteBuffer;
 import java.nio.CharBuffer;
 import java.nio.charset.CharacterCodingException;
 import java.nio.charset.Charset;
 import java.nio.charset.CharsetEncoder;
 import java.nio.charset.CoderResult;
 import java.nio.charset.CodingErrorAction;
 import java.util.Objects;

 import org.apache.commons.io.Charsets;
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.io.build.AbstractStreamBuilder;
 import org.apache.commons.io.function.Uncheck;

 /**
  * Implements an {@link InputStream} to read from String, StringBuffer, StringBuilder or CharBuffer.
  * <p>
  * <strong>Note:</strong> Supports {@link #mark(int)} and {@link #reset()}.
  * </p>
  *
  * @since 2.2
  */
 public class CharSequenceInputStream extends InputStream {

     /**
      * Builds a new {@link CharSequenceInputStream} instance.
      * <p>
      * For example:
      * </p>
      *
      * <pre>{@code
      * CharSequenceInputStream s = CharSequenceInputStream.builder()
      *   .setBufferSize(8192)
      *   .setCharSequence("String")
      *   .setCharsetEncoder(Charset.defaultCharset())
      *   .get();}
      * </pre>
      *
      * @since 2.13.0
      */
     public static class Builder extends AbstractStreamBuilder<CharSequenceInputStream, Builder> {

         /**
          * Constructs a new instance.
          * <p>
          * This builder use the aspects the CharSequence, buffer size, and Charset.
          * </p>
          *
          * @return a new instance.
          * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
          */
         @Override
         public CharSequenceInputStream get() {
             return Uncheck.get(() -> new CharSequenceInputStream(getCharSequence(), getCharset(), getBufferSize()));
         }

     }

     private static final int NO_MARK = -1;

     /**
      * Constructs a new {@link Builder}.
      *
      * @return a new {@link Builder}.
      * @since 2.12.0
      */
     public static Builder builder() {
         return new Builder();
     }

     private final CharsetEncoder charsetEncoder;
     private final CharBuffer cBuf;
     private final ByteBuffer bBuf;

     private int cBufMark; // position in cBuf
     private int bBufMark; // position in bBuf

     /**
      * Constructs a new instance with a buffer size of {@link IOUtils#DEFAULT_BUFFER_SIZE}.
      *
      * @param cs the input character sequence.
      * @param charset the character set name to use.
      * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
      */
     @Deprecated
     public CharSequenceInputStream(final CharSequence cs, final Charset charset) {
         this(cs, charset, IOUtils.DEFAULT_BUFFER_SIZE);
     }

     /**
      * Constructs a new instance.
      *
      * @param cs the input character sequence.
      * @param charset the character set name to use, null maps to the default Charset.
      * @param bufferSize the buffer size to use.
      * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
      */
     @Deprecated
     public CharSequenceInputStream(final CharSequence cs, final Charset charset, final int bufferSize) {
         // @formatter:off
         this.charsetEncoder = Charsets.toCharset(charset).newEncoder()
             .onMalformedInput(CodingErrorAction.REPLACE)
             .onUnmappableCharacter(CodingErrorAction.REPLACE);
         // @formatter:on
         // Ensure that buffer is long enough to hold a complete character
         this.bBuf = ByteBuffer.allocate(ReaderInputStream.checkMinBufferSize(charsetEncoder, bufferSize));
         this.bBuf.flip();
         this.cBuf = CharBuffer.wrap(cs);
         this.cBufMark = NO_MARK;
         this.bBufMark = NO_MARK;
     }

     /**
      * Constructs a new instance with a buffer size of {@link IOUtils#DEFAULT_BUFFER_SIZE}.
      *
      * @param cs the input character sequence.
      * @param charset the character set name to use.
      * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
      */
     @Deprecated
     public CharSequenceInputStream(final CharSequence cs, final String charset) {
         this(cs, charset, IOUtils.DEFAULT_BUFFER_SIZE);
     }

     /**
      * Constructs a new instance.
      *
      * @param cs the input character sequence.
      * @param charset the character set name to use, null maps to the default Charset.
      * @param bufferSize the buffer size to use.
      * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
      */
     @Deprecated
     public CharSequenceInputStream(final CharSequence cs, final String charset, final int bufferSize) {
         this(cs, Charsets.toCharset(charset), bufferSize);
     }

     /**
      * Return an estimate of the number of bytes remaining in the byte stream.
      * @return the count of bytes that can be read without blocking (or returning EOF).
      *
      * @throws IOException if an error occurs (probably not possible).
      */
     @Override
     public int available() throws IOException {
         // The cached entries are in bBuf; since encoding always creates at least one byte
         // per character, we can add the two to get a better estimate (e.g. if bBuf is empty)
         // Note that the previous implementation (2.4) could return zero even though there were
         // encoded bytes still available.
         return this.bBuf.remaining() + this.cBuf.remaining();
     }

     @Override
     public void close() throws IOException {
         // noop
     }

     /**
      * Fills the byte output buffer from the input char buffer.
      *
      * @throws CharacterCodingException
      *             an error encoding data.
      */
     private void fillBuffer() throws CharacterCodingException {
         this.bBuf.compact();
         final CoderResult result = this.charsetEncoder.encode(this.cBuf, this.bBuf, true);
         if (result.isError()) {
             result.throwException();
         }
         this.bBuf.flip();
     }

     /**
      * Gets the CharsetEncoder.
      *
      * @return the CharsetEncoder.
      */
     CharsetEncoder getCharsetEncoder() {
         return charsetEncoder;
     }

     /**
      * {@inheritDoc}
      * @param readlimit max read limit (ignored).
      */
     @Override
     public synchronized void mark(final int readlimit) {
         this.cBufMark = this.cBuf.position();
         this.bBufMark = this.bBuf.position();
         this.cBuf.mark();
         this.bBuf.mark();
         // It would be nice to be able to use mark & reset on the cBuf and bBuf;
         // however the bBuf is re-used so that won't work
     }

     @Override
     public boolean markSupported() {
         return true;
     }

     @Override
     public int read() throws IOException {
         for (;;) {
             if (this.bBuf.hasRemaining()) {
                 return this.bBuf.get() & 0xFF;
             }
             fillBuffer();
             if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) {
                 return EOF;
             }
         }
     }

     @Override
     public int read(final byte[] b) throws IOException {
         return read(b, 0, b.length);
     }

     @Override
     public int read(final byte[] array, int off, int len) throws IOException {
         Objects.requireNonNull(array, "array");
         if (len < 0 || off + len > array.length) {
             throw new IndexOutOfBoundsException("Array Size=" + array.length + ", offset=" + off + ", length=" + len);
         }
         if (len == 0) {
             return 0; // must return 0 for zero length read
         }
         if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) {
             return EOF;
         }
         int bytesRead = 0;
         while (len > 0) {
             if (this.bBuf.hasRemaining()) {
                 final int chunk = Math.min(this.bBuf.remaining(), len);
                 this.bBuf.get(array, off, chunk);
                 off += chunk;
                 len -= chunk;
                 bytesRead += chunk;
             } else {
                 fillBuffer();
                 if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) {
                     break;
                 }
             }
         }
         return bytesRead == 0 && !this.cBuf.hasRemaining() ? EOF : bytesRead;
     }

     @Override
     public synchronized void reset() throws IOException {
         //
         // This is not the most efficient implementation, as it re-encodes from the beginning.
         //
         // Since the bBuf is re-used, in general it's necessary to re-encode the data.
         //
         // It should be possible to apply some optimizations however:
         // + use mark/reset on the cBuf and bBuf. This would only work if the buffer had not been (re)filled since
         // the mark. The code would have to catch InvalidMarkException - does not seem possible to check if mark is
         // valid otherwise. + Try saving the state of the cBuf before each fillBuffer; it might be possible to
         // restart from there.
         //
         if (this.cBufMark != NO_MARK) {
             // if cBuf is at 0, we have not started reading anything, so skip re-encoding
             if (this.cBuf.position() != 0) {
                 this.charsetEncoder.reset();
                 this.cBuf.rewind();
                 this.bBuf.rewind();
                 this.bBuf.limit(0); // rewind does not clear the buffer
                 while (this.cBuf.position() < this.cBufMark) {
                     this.bBuf.rewind(); // empty the buffer (we only refill when empty during normal processing)
                     this.bBuf.limit(0);
                     fillBuffer();
                 }
             }
             if (this.cBuf.position() != this.cBufMark) {
                 throw new IllegalStateException("Unexpected CharBuffer position: actual=" + cBuf.position() + " " +
                         "expected=" + this.cBufMark);
             }
             this.bBuf.position(this.bBufMark);
             this.cBufMark = NO_MARK;
             this.bBufMark = NO_MARK;
         }
     }

     @Override
     public long skip(long n) throws IOException {
         //
         // This could be made more efficient by using position to skip within the current buffer.
         //
         long skipped = 0;
         while (n > 0 && available() > 0) {
             this.read();
             n--;
             skipped++;
         }
         return skipped;
     }

 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.commons.io.input;

	import static org.apache.commons.io.IOUtils.EOF;

	import java.io.IOException;
	import java.io.InputStream;
	import java.nio.ByteBuffer;
	import java.nio.CharBuffer;
	import java.nio.charset.CharacterCodingException;
	import java.nio.charset.Charset;
	import java.nio.charset.CharsetEncoder;
	import java.nio.charset.CoderResult;
	import java.nio.charset.CodingErrorAction;
	import java.util.Objects;

	import org.apache.commons.io.Charsets;
	import org.apache.commons.io.IOUtils;
	import org.apache.commons.io.build.AbstractStreamBuilder;
	import org.apache.commons.io.function.Uncheck;

	/**
	* Implements an {@link InputStream} to read from String, StringBuffer, StringBuilder or CharBuffer.
	* <p>
	* <strong>Note:</strong> Supports {@link #mark(int)} and {@link #reset()}.
	* </p>
	*
	* @since 2.2
	*/
	public class CharSequenceInputStream extends InputStream {

	/**
	* Builds a new {@link CharSequenceInputStream} instance.
	* <p>
	* For example:
	* </p>
	*
	* <pre>{@code
	* CharSequenceInputStream s = CharSequenceInputStream.builder()
	* .setBufferSize(8192)
	* .setCharSequence("String")
	* .setCharsetEncoder(Charset.defaultCharset())
	* .get();}
	* </pre>
	*
	* @since 2.13.0
	*/
	public static class Builder extends AbstractStreamBuilder<CharSequenceInputStream, Builder> {

	/**
	* Constructs a new instance.
	* <p>
	* This builder use the aspects the CharSequence, buffer size, and Charset.
	* </p>
	*
	* @return a new instance.
	* @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
	*/
	@Override
	public CharSequenceInputStream get() {
	return Uncheck.get(() -> new CharSequenceInputStream(getCharSequence(), getCharset(), getBufferSize()));
	}

	}

	private static final int NO_MARK = -1;

	/**
	* Constructs a new {@link Builder}.
	*
	* @return a new {@link Builder}.
	* @since 2.12.0
	*/
	public static Builder builder() {
	return new Builder();
	}

	private final CharsetEncoder charsetEncoder;
	private final CharBuffer cBuf;
	private final ByteBuffer bBuf;

	private int cBufMark; // position in cBuf
	private int bBufMark; // position in bBuf

	/**
	* Constructs a new instance with a buffer size of {@link IOUtils#DEFAULT_BUFFER_SIZE}.
	*
	* @param cs the input character sequence.
	* @param charset the character set name to use.
	* @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
	* @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
	*/
	@Deprecated
	public CharSequenceInputStream(final CharSequence cs, final Charset charset) {
	this(cs, charset, IOUtils.DEFAULT_BUFFER_SIZE);
	}

	/**
	* Constructs a new instance.
	*
	* @param cs the input character sequence.
	* @param charset the character set name to use, null maps to the default Charset.
	* @param bufferSize the buffer size to use.
	* @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
	* @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
	*/
	@Deprecated
	public CharSequenceInputStream(final CharSequence cs, final Charset charset, final int bufferSize) {
	// @formatter:off
	this.charsetEncoder = Charsets.toCharset(charset).newEncoder()
	.onMalformedInput(CodingErrorAction.REPLACE)
	.onUnmappableCharacter(CodingErrorAction.REPLACE);
	// @formatter:on
	// Ensure that buffer is long enough to hold a complete character
	this.bBuf = ByteBuffer.allocate(ReaderInputStream.checkMinBufferSize(charsetEncoder, bufferSize));
	this.bBuf.flip();
	this.cBuf = CharBuffer.wrap(cs);
	this.cBufMark = NO_MARK;
	this.bBufMark = NO_MARK;
	}

	/**
	* Constructs a new instance with a buffer size of {@link IOUtils#DEFAULT_BUFFER_SIZE}.
	*
	* @param cs the input character sequence.
	* @param charset the character set name to use.
	* @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
	* @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
	*/
	@Deprecated
	public CharSequenceInputStream(final CharSequence cs, final String charset) {
	this(cs, charset, IOUtils.DEFAULT_BUFFER_SIZE);
	}

	/**
	* Constructs a new instance.
	*
	* @param cs the input character sequence.
	* @param charset the character set name to use, null maps to the default Charset.
	* @param bufferSize the buffer size to use.
	* @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
	* @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
	*/
	@Deprecated
	public CharSequenceInputStream(final CharSequence cs, final String charset, final int bufferSize) {
	this(cs, Charsets.toCharset(charset), bufferSize);
	}

	/**
	* Return an estimate of the number of bytes remaining in the byte stream.
	* @return the count of bytes that can be read without blocking (or returning EOF).
	*
	* @throws IOException if an error occurs (probably not possible).
	*/
	@Override
	public int available() throws IOException {
	// The cached entries are in bBuf; since encoding always creates at least one byte
	// per character, we can add the two to get a better estimate (e.g. if bBuf is empty)
	// Note that the previous implementation (2.4) could return zero even though there were
	// encoded bytes still available.
	return this.bBuf.remaining() + this.cBuf.remaining();
	}

	@Override
	public void close() throws IOException {
	// noop
	}

	/**
	* Fills the byte output buffer from the input char buffer.
	*
	* @throws CharacterCodingException
	* an error encoding data.
	*/
	private void fillBuffer() throws CharacterCodingException {
	this.bBuf.compact();
	final CoderResult result = this.charsetEncoder.encode(this.cBuf, this.bBuf, true);
	if (result.isError()) {
	result.throwException();
	}
	this.bBuf.flip();
	}

	/**
	* Gets the CharsetEncoder.
	*
	* @return the CharsetEncoder.
	*/
	CharsetEncoder getCharsetEncoder() {
	return charsetEncoder;
	}

	/**
	* {@inheritDoc}
	* @param readlimit max read limit (ignored).
	*/
	@Override
	public synchronized void mark(final int readlimit) {
	this.cBufMark = this.cBuf.position();
	this.bBufMark = this.bBuf.position();
	this.cBuf.mark();
	this.bBuf.mark();
	// It would be nice to be able to use mark & reset on the cBuf and bBuf;
	// however the bBuf is re-used so that won't work
	}

	@Override
	public boolean markSupported() {
	return true;
	}

	@Override
	public int read() throws IOException {
	for (;;) {
	if (this.bBuf.hasRemaining()) {
	return this.bBuf.get() & 0xFF;
	}
	fillBuffer();
	if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) {
	return EOF;
	}
	}
	}

	@Override
	public int read(final byte[] b) throws IOException {
	return read(b, 0, b.length);
	}

	@Override
	public int read(final byte[] array, int off, int len) throws IOException {
	Objects.requireNonNull(array, "array");
	if (len < 0 \|\| off + len > array.length) {
	throw new IndexOutOfBoundsException("Array Size=" + array.length + ", offset=" + off + ", length=" + len);
	}
	if (len == 0) {
	return 0; // must return 0 for zero length read
	}
	if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) {
	return EOF;
	}
	int bytesRead = 0;
	while (len > 0) {
	if (this.bBuf.hasRemaining()) {
	final int chunk = Math.min(this.bBuf.remaining(), len);
	this.bBuf.get(array, off, chunk);
	off += chunk;
	len -= chunk;
	bytesRead += chunk;
	} else {
	fillBuffer();
	if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) {
	break;
	}
	}
	}
	return bytesRead == 0 && !this.cBuf.hasRemaining() ? EOF : bytesRead;
	}

	@Override
	public synchronized void reset() throws IOException {
	//
	// This is not the most efficient implementation, as it re-encodes from the beginning.
	//
	// Since the bBuf is re-used, in general it's necessary to re-encode the data.
	//
	// It should be possible to apply some optimizations however:
	// + use mark/reset on the cBuf and bBuf. This would only work if the buffer had not been (re)filled since
	// the mark. The code would have to catch InvalidMarkException - does not seem possible to check if mark is
	// valid otherwise. + Try saving the state of the cBuf before each fillBuffer; it might be possible to
	// restart from there.
	//
	if (this.cBufMark != NO_MARK) {
	// if cBuf is at 0, we have not started reading anything, so skip re-encoding
	if (this.cBuf.position() != 0) {
	this.charsetEncoder.reset();
	this.cBuf.rewind();
	this.bBuf.rewind();
	this.bBuf.limit(0); // rewind does not clear the buffer
	while (this.cBuf.position() < this.cBufMark) {
	this.bBuf.rewind(); // empty the buffer (we only refill when empty during normal processing)
	this.bBuf.limit(0);
	fillBuffer();
	}
	}
	if (this.cBuf.position() != this.cBufMark) {
	throw new IllegalStateException("Unexpected CharBuffer position: actual=" + cBuf.position() + " " +
	"expected=" + this.cBufMark);
	}
	this.bBuf.position(this.bBufMark);
	this.cBufMark = NO_MARK;
	this.bBufMark = NO_MARK;
	}
	}

	@Override
	public long skip(long n) throws IOException {
	//
	// This could be made more efficient by using position to skip within the current buffer.
	//
	long skipped = 0;
	while (n > 0 && available() > 0) {
	this.read();
	n--;
	skipped++;
	}
	return skipped;
	}

	}