blob: dd6e45e9e69f771ef3ae6cbbc8518ce6d5f8990a [file] [log] [blame]
// Copyright 2022 The Blaze Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
const BUF_CAPACITY_TARGET: usize = 262144;
const BUF_CAPACITY_ALMOST_FULL: usize = BUF_CAPACITY_TARGET * 4 / 5; // wasting at most 20% space
pub struct BytesArena {
bufs: Vec<Vec<u8>>,
bufs_frozen_mem_size: usize,
}
impl Default for BytesArena {
fn default() -> Self {
// does not pre-allocate memory for first buf
Self {
bufs: vec![Vec::with_capacity(0)],
bufs_frozen_mem_size: 0,
}
}
}
impl BytesArena {
pub fn add(&mut self, bytes: &[u8]) -> BytesArenaAddr {
// assume bytes_len < 2^32
let cur_buf_len = self.cur_buf().len();
let len = bytes.len();
// freeze current buf if it's almost full and has no enough space for the given
// bytes
if cur_buf_len > BUF_CAPACITY_ALMOST_FULL && cur_buf_len + len > BUF_CAPACITY_TARGET {
self.freeze_cur_buf();
}
let id = self.bufs.len() - 1;
let offset = self.cur_buf().len();
self.cur_buf_mut().extend_from_slice(bytes);
BytesArenaAddr::new(id, offset, len)
}
pub fn get(&self, addr: BytesArenaAddr) -> &[u8] {
let unpacked = addr.unpack();
unsafe {
// safety - performance critical, assume addr is valid
self.bufs
.get_unchecked(unpacked.id)
.get_unchecked(unpacked.offset..unpacked.offset + unpacked.len)
}
}
pub fn clear(&mut self) {
*self = Self::default();
}
/// specialized for merging two parts in sort-exec
/// works like an IntoIterator, free memory of visited items
pub fn specialized_get_and_drop_last(&mut self, addr: BytesArenaAddr) -> &[u8] {
let unpacked = addr.unpack();
if unpacked.id > 0 && !self.bufs[unpacked.id - 1].is_empty() {
self.bufs[unpacked.id - 1].truncate(0); // drop last buf
self.bufs[unpacked.id - 1].shrink_to_fit();
}
unsafe {
// safety - performance critical, assume addr is valid
self.bufs
.get_unchecked(unpacked.id)
.get_unchecked(unpacked.offset..unpacked.offset + unpacked.len)
}
}
pub fn mem_size(&self) -> usize {
self.bufs_frozen_mem_size + self.cur_buf().capacity()
}
fn cur_buf(&self) -> &Vec<u8> {
self.bufs.last().unwrap() // has always at least one buf
}
fn cur_buf_mut(&mut self) -> &mut Vec<u8> {
self.bufs.last_mut().unwrap() // has always at least one buf
}
fn freeze_cur_buf(&mut self) {
let frozen_mem_size = self.cur_buf().capacity();
self.bufs_frozen_mem_size += frozen_mem_size;
self.bufs.push(Vec::with_capacity(BUF_CAPACITY_TARGET));
}
}
#[derive(Clone, Copy, PartialEq, Eq, Hash)]
pub struct BytesArenaAddr(u64);
impl BytesArenaAddr {
pub fn new(id: usize, offset: usize, len: usize) -> Self {
Self((id as u64 * BUF_CAPACITY_TARGET as u64 + offset as u64) << 32 | len as u64)
}
pub fn unpack(self) -> UnpackedBytesArenaAddr {
let id_offset = self.0 >> 32;
let id = (id_offset / (BUF_CAPACITY_TARGET as u64)) as usize;
let offset = (id_offset % (BUF_CAPACITY_TARGET as u64)) as usize;
let len = (self.0 << 32 >> 32) as usize;
UnpackedBytesArenaAddr { id, offset, len }
}
}
#[derive(Clone, Copy, PartialEq, Eq, Hash)]
pub struct UnpackedBytesArenaAddr {
pub id: usize,
pub offset: usize,
pub len: usize,
}