blob: df736295538d0c6ddf46c1520a32427b85f28bdd [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#ifndef DBCOMMON_SRC_DBCOMMON_UTILS_FLAT_MEMORY_BUFFER_H_
#define DBCOMMON_SRC_DBCOMMON_UTILS_FLAT_MEMORY_BUFFER_H_
#include <cassert>
#include <memory>
#include <utility>
#include <vector>
#include "dbcommon/utils/cutils.h"
#include "dbcommon/utils/int-util.h"
#include "dbcommon/utils/macro.h"
namespace dbcommon {
// A FlatMemBuf is used to avoid Out of Memory when acquire for large memory,
// and reduce needless realloc. Users can use it as an array of elements.
//
// FlatMemBuf works like a single-level page table. The second level is a set of
// memory block(size specified by user) and the first level is an array of the
// pointers that points to the memory block in second level.
//
// Theoretically, accessing each element require two memory access. However,
// only one memory block exists when the size of FlatMemBuf is small, so we
// introduce two different accessor for FlatMemBuf.
//
// Be familiar with the implementation of this class, otherwise bug-prone
template <typename TYPE, std::size_t MEMBLKSIZE>
class FlatMemBuf {
public:
class NormalAccessor {
public:
explicit NormalAccessor(const FlatMemBuf &fmb)
: memBlkPtrList(fmb.memBlkPtrList) {}
force_inline TYPE *at(uint64_t index) {
return reinterpret_cast<TYPE *>(memBlkPtrList[index >> ShiftLen]) +
(index & Mask);
}
private:
char **const memBlkPtrList;
};
class QuickAccessor {
public:
explicit QuickAccessor(const FlatMemBuf &fmb) : memBlkPtr(fmb.memBlkPtr0) {}
force_inline TYPE *at(uint64_t index) {
return reinterpret_cast<TYPE *>(memBlkPtr) + index;
}
private:
char *const memBlkPtr;
};
FlatMemBuf() {
static_assert((MEMBLKSIZE & (MEMBLKSIZE - 1)) == 0, "");
static_assert(((sizeof(TYPE) & (sizeof(TYPE) - 1)) == 0),
"supposed sizeof(TYPE) to be power of two");
}
void reserve(uint64_t size) {
if (sizeof(TYPE) * size <= getMemUsed()) return;
auto filledBlockCount = sizeof(TYPE) * size / MEMBLKSIZE;
auto remainedMemSize = nextPowerOfTwo(sizeof(TYPE) * size % MEMBLKSIZE);
if (!memBlkList.empty() && filledBlockCount) {
if (filledBlockCount >= memBlkList.size() && lastBlkSize_ != MEMBLKSIZE) {
// enlarge previous block
auto blk = std::move(memBlkList.back());
memBlkPtrListVec.pop_back();
memBlkList.pop_back();
auto newBlk = allocate(blk.release(), MEMBLKSIZE);
memBlkPtrListVec.push_back(newBlk.first);
memBlkPtrList = &memBlkPtrListVec[0];
memBlkList.push_back(std::move(newBlk.second));
lastBlkSize_ = MEMBLKSIZE;
}
}
while (memBlkList.size() < filledBlockCount) {
auto newBlk = allocate(nullptr, MEMBLKSIZE);
memBlkPtrListVec.push_back(newBlk.first);
memBlkPtrList = &memBlkPtrListVec[0];
memBlkList.push_back(std::move(newBlk.second));
}
assert(memBlkPtrListVec.size() == memBlkList.size());
assert(memBlkList.size() >= filledBlockCount);
if (memBlkList.size() == (filledBlockCount + (remainedMemSize ? 1 : 0))) {
if (remainedMemSize && remainedMemSize < MEMBLKSIZE) {
// enlarge previous block
auto blk = std::move(memBlkList.back());
memBlkPtrListVec.pop_back();
memBlkList.pop_back();
auto newBlk = allocate(blk.release(), remainedMemSize);
memBlkPtrListVec.push_back(newBlk.first);
memBlkPtrList = &memBlkPtrListVec[0];
memBlkList.push_back(std::move(newBlk.second));
lastBlkSize_ = remainedMemSize;
}
} else {
// allocate new block
assert(remainedMemSize);
auto newBlk = allocate(nullptr, remainedMemSize);
memBlkPtrListVec.push_back(newBlk.first);
memBlkPtrList = &memBlkPtrListVec[0];
memBlkList.push_back(std::move(newBlk.second));
lastBlkSize_ = remainedMemSize;
}
if (!memBlkPtrListVec.empty()) {
memBlkPtr0 = memBlkPtrListVec[0];
}
}
force_inline void resize(uint64_t size) {
reserve(size);
size_ = size;
}
force_inline uint64_t size() { return size_; }
force_inline TYPE dataAt(uint64_t index) { return *ptrAt(index); }
force_inline TYPE *getPtrAtBlk0() {
return reinterpret_cast<TYPE *>(memBlkPtr0);
}
force_inline void **getPtrList() {
return reinterpret_cast<void **>(memBlkPtrList);
}
// todo Refactor this function to c++ style
void fetch(uint64_t size, uint64_t mask, uint64_t *idx, TYPE *ret) {
if (size_ <= BlkSize) {
// performance: reduce memory access for memBlkPtr0
TYPE *tmpPtr = reinterpret_cast<TYPE *>(memBlkPtr0);
for (uint64_t i = 0; i < size; i++) ret[i] = tmpPtr[idx[i] & mask];
} else {
for (uint64_t i = 0; i < size; i++) ret[i] = *ptrAt(idx[i] & mask);
}
}
force_inline void push_back(const TYPE &data) {
reserve(size_ + 1);
size_++;
*ptrAt(size_ - 1) = data;
}
// Set the front elements in range of [0, size) to zero.
void memZero(uint64_t size) {
assert(size <= size_);
uint64_t idx = 0;
while (size >= BlkSize) {
memset(memBlkPtrListVec[idx], 0, MEMBLKSIZE);
idx++;
size -= BlkSize;
}
if (size > 0) memset(memBlkPtrListVec[idx], 0, size * sizeof(TYPE));
}
// Initialize [begin, end) to zero.
void memZero(uint64_t begin, uint64_t end) {
assert(begin <= end && end <= size_);
uint64_t beginBlkIdx = begin / BlkSize;
uint64_t endBlkIdx = end / BlkSize;
// Specialized initialization for single block
if (beginBlkIdx == endBlkIdx) {
memset(memBlkPtrListVec[beginBlkIdx] + (begin % BlkSize) * sizeof(TYPE),
0, (end - begin) * sizeof(TYPE));
return;
}
// Initialize leftmost leftover
memset(memBlkPtrListVec[beginBlkIdx] + (begin % BlkSize) * sizeof(TYPE), 0,
MEMBLKSIZE - (begin % BlkSize) * sizeof(TYPE));
// Initialize intermediate blocks
beginBlkIdx++;
while (beginBlkIdx < endBlkIdx) {
memset(memBlkPtrListVec[beginBlkIdx], 0, MEMBLKSIZE);
beginBlkIdx++;
}
// Initialize rightmost leftover
if (end % BlkSize > 0)
memset(memBlkPtrListVec[endBlkIdx], 0, (end % BlkSize) * sizeof(TYPE));
}
force_inline TYPE *ptrAt(uint64_t index) {
assert(index < size_);
return reinterpret_cast<TYPE *>(memBlkPtrList[index >> ShiftLen]) +
(index & Mask);
}
force_inline TYPE *ptrAtQuick(uint64_t index) {
assert(index < BlkSize);
return reinterpret_cast<TYPE *>(memBlkPtr0) + index;
}
bool enableQuickAccess() { return size_ <= BlkSize; }
static const uint64_t BlkSize = MEMBLKSIZE / sizeof(TYPE);
double getMemUsed() {
double ret = 0;
ret += memBlkList.size() > 1 ? MEMBLKSIZE * (memBlkList.size() - 1) : 0;
ret += lastBlkSize_;
assert(ret >= sizeof(TYPE) * size());
return ret;
}
void reset() {
lastBlkSize_ = 0;
memBlkList.clear();
memBlkPtrListVec.clear();
memBlkPtrList = nullptr;
memBlkPtr0 = nullptr;
size_ = 0;
}
private:
std::pair<char *, MemBlkOwner> allocate(char *ptr, size_t size) {
// Exactly, each MemBlkOwner contains extra spare memory beyond the
// specified MEMBLKSIZE, in order to align memory address into the
// multiple of the cache line size, which helps to reduce cache miss when
// size_ is small.
// For example, the number of the groups in TPCH-Q1 is 4 and
// sizeof(AggGroupValue) is 16. There is only 64 byte memory keeping being
// accessed, which fits into one cache line perfectly.
// TODO(chiyang): add back the alignment of cache line size
MemBlkOwner tmpOwner(MemBlkOwner({(cnrealloc(ptr, size)), cnfree}));
char *tmpPtr = tmpOwner.get();
return std::make_pair(tmpPtr, std::move(tmpOwner));
}
// the maximum number of the elements that contained in a memory block
static const uint64_t Mask = BlkSize - 1;
static const uint64_t ShiftLen = 64 - __builtin_clzll(Mask);
uint64_t lastBlkSize_ = 0;
std::vector<MemBlkOwner> memBlkList;
std::vector<char *> memBlkPtrListVec;
char **memBlkPtrList = nullptr;
char *memBlkPtr0 = nullptr; // performance: for small scale
uint64_t size_ = 0; // number of stored unit type
};
} // namespace dbcommon
#endif // DBCOMMON_SRC_DBCOMMON_UTILS_FLAT_MEMORY_BUFFER_H_