blob: f73ddd7229f63d10e76b3e6af29846e38ea407cb [file] [log] [blame]
// Copyright (C) 2017-2018 Baidu, Inc. All Rights Reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in
// the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Baidu, Inc., nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//! Buffering wrappers for I/O traits
use io::prelude::*;
use error;
use io::{self, Initializer, DEFAULT_BUF_SIZE, Error, ErrorKind, SeekFrom};
use memchr;
use core::cmp;
use core::fmt;
/// The `BufReader` struct adds buffering to any reader.
///
/// It can be excessively inefficient to work directly with a [`Read`] instance.
/// For example, every call to [`read`][`TcpStream::read`] on [`TcpStream`]
/// results in a system call. A `BufReader` performs large, infrequent reads on
/// the underlying [`Read`] and maintains an in-memory buffer of the results.
///
/// [`Read`]: ../../std/io/trait.Read.html
/// [`TcpStream::read`]: ../../std/net/struct.TcpStream.html#method.read
/// [`TcpStream`]: ../../std/net/struct.TcpStream.html
///
pub struct BufReader<R> {
inner: R,
buf: Box<[u8]>,
pos: usize,
cap: usize,
}
impl<R: Read> BufReader<R> {
/// Creates a new `BufReader` with a default buffer capacity.
///
pub fn new(inner: R) -> BufReader<R> {
BufReader::with_capacity(DEFAULT_BUF_SIZE, inner)
}
/// Creates a new `BufReader` with the specified buffer capacity.
///
pub fn with_capacity(cap: usize, inner: R) -> BufReader<R> {
unsafe {
let mut buffer = Vec::with_capacity(cap);
buffer.set_len(cap);
inner.initializer().initialize(&mut buffer);
BufReader {
inner: inner,
buf: buffer.into_boxed_slice(),
pos: 0,
cap: 0,
}
}
}
/// Gets a reference to the underlying reader.
///
/// It is inadvisable to directly read from the underlying reader.
///
pub fn get_ref(&self) -> &R { &self.inner }
/// Gets a mutable reference to the underlying reader.
///
/// It is inadvisable to directly read from the underlying reader.
///
pub fn get_mut(&mut self) -> &mut R { &mut self.inner }
/// Returns `true` if there are no bytes in the internal buffer.
///
pub fn is_empty(&self) -> bool {
self.buffer().is_empty()
}
/// Returns a reference to the internally buffered data.
///
/// Unlike `fill_buf`, this will not attempt to fill the buffer if it is empty.
///
pub fn buffer(&self) -> &[u8] {
&self.buf[self.pos..self.cap]
}
/// Unwraps this `BufReader`, returning the underlying reader.
///
/// Note that any leftover data in the internal buffer is lost.
///
pub fn into_inner(self) -> R { self.inner }
}
impl<R: Seek> BufReader<R> {
/// Seeks relative to the current position. If the new position lies within the buffer,
/// the buffer will not be flushed, allowing for more efficient seeks.
/// This method does not return the location of the underlying reader, so the caller
/// must track this information themselves if it is required.
pub fn seek_relative(&mut self, offset: i64) -> io::Result<()> {
let pos = self.pos as u64;
if offset < 0 {
if let Some(new_pos) = pos.checked_sub((-offset) as u64) {
self.pos = new_pos as usize;
return Ok(())
}
} else {
if let Some(new_pos) = pos.checked_add(offset as u64) {
if new_pos <= self.cap as u64 {
self.pos = new_pos as usize;
return Ok(())
}
}
}
self.seek(SeekFrom::Current(offset)).map(|_|())
}
}
impl<R: Read> Read for BufReader<R> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
// If we don't have any buffered data and we're doing a massive read
// (larger than our internal buffer), bypass our internal buffer
// entirely.
if self.pos == self.cap && buf.len() >= self.buf.len() {
return self.inner.read(buf);
}
let nread = {
let mut rem = self.fill_buf()?;
rem.read(buf)?
};
self.consume(nread);
Ok(nread)
}
// we can't skip unconditionally because of the large buffer case in read.
unsafe fn initializer(&self) -> Initializer {
self.inner.initializer()
}
}
impl<R: Read> BufRead for BufReader<R> {
fn fill_buf(&mut self) -> io::Result<&[u8]> {
// If we've reached the end of our internal buffer then we need to fetch
// some more data from the underlying reader.
// Branch using `>=` instead of the more correct `==`
// to tell the compiler that the pos..cap slice is always valid.
if self.pos >= self.cap {
debug_assert!(self.pos == self.cap);
self.cap = self.inner.read(&mut self.buf)?;
self.pos = 0;
}
Ok(&self.buf[self.pos..self.cap])
}
fn consume(&mut self, amt: usize) {
self.pos = cmp::min(self.pos + amt, self.cap);
}
}
impl<R> fmt::Debug for BufReader<R> where R: fmt::Debug {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
fmt.debug_struct("BufReader")
.field("reader", &self.inner)
.field("buffer", &format_args!("{}/{}", self.cap - self.pos, self.buf.len()))
.finish()
}
}
impl<R: Seek> Seek for BufReader<R> {
/// Seek to an offset, in bytes, in the underlying reader.
///
/// The position used for seeking with `SeekFrom::Current(_)` is the
/// position the underlying reader would be at if the `BufReader` had no
/// internal buffer.
///
/// Seeking always discards the internal buffer, even if the seek position
/// would otherwise fall within it. This guarantees that calling
/// `.into_inner()` immediately after a seek yields the underlying reader
/// at the same position.
///
/// To seek without discarding the internal buffer, use [`seek_relative`].
///
/// See `std::io::Seek` for more details.
///
/// Note: In the edge case where you're seeking with `SeekFrom::Current(n)`
/// where `n` minus the internal buffer length overflows an `i64`, two
/// seeks will be performed instead of one. If the second seek returns
/// `Err`, the underlying reader will be left at the same position it would
/// have if you called `seek` with `SeekFrom::Current(0)`.
///
/// [`seek_relative`]: #method.seek_relative
///
fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
let result: u64;
if let SeekFrom::Current(n) = pos {
let remainder = (self.cap - self.pos) as i64;
// it should be safe to assume that remainder fits within an i64 as the alternative
// means we managed to allocate 8 exbibytes and that's absurd.
// But it's not out of the realm of possibility for some weird underlying reader to
// support seeking by i64::min_value() so we need to handle underflow when subtracting
// remainder.
if let Some(offset) = n.checked_sub(remainder) {
result = self.inner.seek(SeekFrom::Current(offset))?;
} else {
// seek backwards by our remainder, and then by the offset
self.inner.seek(SeekFrom::Current(-remainder))?;
self.pos = self.cap; // empty the buffer
result = self.inner.seek(SeekFrom::Current(n))?;
}
} else {
// Seeking with Start/End doesn't care about our buffer length.
result = self.inner.seek(pos)?;
}
self.pos = self.cap; // empty the buffer
Ok(result)
}
}
/// Wraps a writer and buffers its output.
///
/// It can be excessively inefficient to work directly with something that
/// implements [`Write`]. For example, every call to
/// [`write`][`Tcpstream::write`] on [`TcpStream`] results in a system call. A
/// `BufWriter` keeps an in-memory buffer of data and writes it to an underlying
/// writer in large, infrequent batches.
///
/// When the `BufWriter` is dropped, the contents of its buffer will be written
/// out. However, any errors that happen in the process of flushing the buffer
/// when the writer is dropped will be ignored. Code that wishes to handle such
/// errors must manually call [`flush`] before the writer is dropped.
///
/// By wrapping the stream with a `BufWriter`, these ten writes are all grouped
/// together by the buffer, and will all be written out in one system call when
/// the `stream` is dropped.
///
/// [`Write`]: ../../std/io/trait.Write.html
/// [`Tcpstream::write`]: ../../std/net/struct.TcpStream.html#method.write
/// [`TcpStream`]: ../../std/net/struct.TcpStream.html
/// [`flush`]: #method.flush
pub struct BufWriter<W: Write> {
inner: Option<W>,
buf: Vec<u8>,
// #30888: If the inner writer panics in a call to write, we don't want to
// write the buffered data a second time in BufWriter's destructor. This
// flag tells the Drop impl if it should skip the flush.
panicked: bool,
}
/// An error returned by `into_inner` which combines an error that
/// happened while writing out the buffer, and the buffered writer object
/// which may be used to recover from the condition.
///
#[derive(Debug)]
pub struct IntoInnerError<W>(W, Error);
impl<W: Write> BufWriter<W> {
/// Creates a new `BufWriter` with a default buffer capacity.
///
pub fn new(inner: W) -> BufWriter<W> {
BufWriter::with_capacity(DEFAULT_BUF_SIZE, inner)
}
/// Creates a new `BufWriter` with the specified buffer capacity.
///
pub fn with_capacity(cap: usize, inner: W) -> BufWriter<W> {
BufWriter {
inner: Some(inner),
buf: Vec::with_capacity(cap),
panicked: false,
}
}
fn flush_buf(&mut self) -> io::Result<()> {
let mut written = 0;
let len = self.buf.len();
let mut ret = Ok(());
while written < len {
self.panicked = true;
let r = self.inner.as_mut().unwrap().write(&self.buf[written..]);
self.panicked = false;
match r {
Ok(0) => {
ret = Err(Error::new(ErrorKind::WriteZero,
"failed to write the buffered data"));
break;
}
Ok(n) => written += n,
Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {}
Err(e) => { ret = Err(e); break }
}
}
if written > 0 {
self.buf.drain(..written);
}
ret
}
/// Gets a reference to the underlying writer.
///
pub fn get_ref(&self) -> &W { self.inner.as_ref().unwrap() }
/// Gets a mutable reference to the underlying writer.
///
/// It is inadvisable to directly write to the underlying writer.
///
pub fn get_mut(&mut self) -> &mut W { self.inner.as_mut().unwrap() }
/// Unwraps this `BufWriter`, returning the underlying writer.
///
/// The buffer is written out before returning the writer.
///
/// # Errors
///
/// An `Err` will be returned if an error occurs while flushing the buffer.
///
pub fn into_inner(mut self) -> Result<W, IntoInnerError<BufWriter<W>>> {
match self.flush_buf() {
Err(e) => Err(IntoInnerError(self, e)),
Ok(()) => Ok(self.inner.take().unwrap())
}
}
}
impl<W: Write> Write for BufWriter<W> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
if self.buf.len() + buf.len() > self.buf.capacity() {
self.flush_buf()?;
}
if buf.len() >= self.buf.capacity() {
self.panicked = true;
let r = self.inner.as_mut().unwrap().write(buf);
self.panicked = false;
r
} else {
Write::write(&mut self.buf, buf)
}
}
fn flush(&mut self) -> io::Result<()> {
self.flush_buf().and_then(|()| self.get_mut().flush())
}
}
impl<W: Write> fmt::Debug for BufWriter<W> where W: fmt::Debug {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
fmt.debug_struct("BufWriter")
.field("writer", &self.inner.as_ref().unwrap())
.field("buffer", &format_args!("{}/{}", self.buf.len(), self.buf.capacity()))
.finish()
}
}
impl<W: Write + Seek> Seek for BufWriter<W> {
/// Seek to the offset, in bytes, in the underlying writer.
///
/// Seeking always writes out the internal buffer before seeking.
fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
self.flush_buf().and_then(|_| self.get_mut().seek(pos))
}
}
impl<W: Write> Drop for BufWriter<W> {
fn drop(&mut self) {
if self.inner.is_some() && !self.panicked {
// dtors should not panic, so we ignore a failed flush
let _r = self.flush_buf();
}
}
}
impl<W> IntoInnerError<W> {
/// Returns the error which caused the call to `into_inner()` to fail.
///
/// This error was returned when attempting to write the internal buffer.
///
pub fn error(&self) -> &Error { &self.1 }
/// Returns the buffered writer instance which generated the error.
///
/// The returned object can be used for error recovery, such as
/// re-inspecting the buffer.
///
pub fn into_inner(self) -> W { self.0 }
}
impl<W> From<IntoInnerError<W>> for Error {
fn from(iie: IntoInnerError<W>) -> Error { iie.1 }
}
impl<W: Send + fmt::Debug> error::Error for IntoInnerError<W> {
fn description(&self) -> &str {
error::Error::description(self.error())
}
}
impl<W> fmt::Display for IntoInnerError<W> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
self.error().fmt(f)
}
}
/// Wraps a writer and buffers output to it, flushing whenever a newline
/// (`0x0a`, `'\n'`) is detected.
///
/// The [`BufWriter`][bufwriter] struct wraps a writer and buffers its output.
/// But it only does this batched write when it goes out of scope, or when the
/// internal buffer is full. Sometimes, you'd prefer to write each line as it's
/// completed, rather than the entire buffer at once. Enter `LineWriter`. It
/// does exactly that.
///
/// [bufwriter]: struct.BufWriter.html
///
/// If there's still a partial line in the buffer when the `LineWriter` is
/// dropped, it will flush those contents.
///
pub struct LineWriter<W: Write> {
inner: BufWriter<W>,
need_flush: bool,
}
impl<W: Write> LineWriter<W> {
/// Creates a new `LineWriter`.
///
pub fn new(inner: W) -> LineWriter<W> {
// Lines typically aren't that long, don't use a giant buffer
LineWriter::with_capacity(1024, inner)
}
/// Creates a new `LineWriter` with a specified capacity for the internal
/// buffer.
///
pub fn with_capacity(cap: usize, inner: W) -> LineWriter<W> {
LineWriter {
inner: BufWriter::with_capacity(cap, inner),
need_flush: false,
}
}
/// Gets a reference to the underlying writer.
///
pub fn get_ref(&self) -> &W { self.inner.get_ref() }
/// Gets a mutable reference to the underlying writer.
///
/// Caution must be taken when calling methods on the mutable reference
/// returned as extra writes could corrupt the output stream.
///
pub fn get_mut(&mut self) -> &mut W { self.inner.get_mut() }
/// Unwraps this `LineWriter`, returning the underlying writer.
///
/// The internal buffer is written out before returning the writer.
///
/// # Errors
///
/// An `Err` will be returned if an error occurs while flushing the buffer.
///
pub fn into_inner(self) -> Result<W, IntoInnerError<LineWriter<W>>> {
self.inner.into_inner().map_err(|IntoInnerError(buf, e)| {
IntoInnerError(LineWriter {
inner: buf,
need_flush: false,
}, e)
})
}
}
impl<W: Write> Write for LineWriter<W> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
if self.need_flush {
self.flush()?;
}
// Find the last newline character in the buffer provided. If found then
// we're going to write all the data up to that point and then flush,
// otherewise we just write the whole block to the underlying writer.
let i = match memchr::memrchr(b'\n', buf) {
Some(i) => i,
None => return self.inner.write(buf),
};
// Ok, we're going to write a partial amount of the data given first
// followed by flushing the newline. After we've successfully written
// some data then we *must* report that we wrote that data, so future
// errors are ignored. We set our internal `need_flush` flag, though, in
// case flushing fails and we need to try it first next time.
let n = self.inner.write(&buf[..i + 1])?;
self.need_flush = true;
if self.flush().is_err() || n != i + 1 {
return Ok(n)
}
// At this point we successfully wrote `i + 1` bytes and flushed it out,
// meaning that the entire line is now flushed out on the screen. While
// we can attempt to finish writing the rest of the data provided.
// Remember though that we ignore errors here as we've successfully
// written data, so we need to report that.
match self.inner.write(&buf[i + 1..]) {
Ok(i) => Ok(n + i),
Err(_) => Ok(n),
}
}
fn flush(&mut self) -> io::Result<()> {
self.inner.flush()?;
self.need_flush = false;
Ok(())
}
}
impl<W: Write> fmt::Debug for LineWriter<W> where W: fmt::Debug {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
fmt.debug_struct("LineWriter")
.field("writer", &self.inner.inner)
.field("buffer",
&format_args!("{}/{}", self.inner.buf.len(), self.inner.buf.capacity()))
.finish()
}
}