blob: 15572551debea9b75ab788fad0ed6aa0d818b4ab [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*!
* \file tvm/runtime/memory/memory_manager.cc
* \brief Allocate and manage memory for the runtime.
*/
#include <tvm/ffi/function.h>
#include <tvm/ffi/reflection/registry.h>
#include <tvm/runtime/memory/memory_manager.h>
#include <memory>
#include <utility>
#include "naive_allocator.h"
#include "pooled_allocator.h"
namespace tvm {
namespace runtime {
namespace memory {
Storage::Storage(Buffer buffer, Allocator* allocator) {
auto n = ffi::make_object<StorageObj>();
n->buffer = std::move(buffer);
n->allocator = allocator;
data_ = std::move(n);
}
inline void VerifyDataType(DLDataType dtype) {
TVM_FFI_ICHECK_GE(dtype.lanes, 1);
if (dtype.code == kDLFloat) {
TVM_FFI_ICHECK_EQ(dtype.bits % 8, 0);
} else {
// allow uint1 as a special flag for bool.
if (dtype.bits == 1 && dtype.code == kDLUInt) return;
TVM_FFI_ICHECK_EQ(dtype.bits % 8, 0);
}
TVM_FFI_ICHECK_EQ(dtype.bits & (dtype.bits - 1), 0);
}
inline size_t GetDataAlignment(const DLDataType& dtype) {
size_t align = dtype.lanes * dtype.bits / 8;
if (align < kAllocAlignment) return kAllocAlignment;
return align;
}
Tensor StorageObj::AllocTensorScoped(int64_t offset, ffi::Shape shape, DLDataType dtype,
ffi::String scope) {
if (scope == "global" || scope.empty()) {
return AllocTensor(offset, shape, dtype);
}
VerifyDataType(dtype);
struct StorageScopedAlloc {
public:
explicit StorageScopedAlloc(Storage storage) : storage_(storage) {}
void AllocData(DLTensor* tensor, const ffi::Shape& shape, const ffi::String& scope,
int64_t byte_offset) {
tensor->data = storage_->allocator->CreateView(storage_->buffer, shape, tensor->dtype, scope);
tensor->byte_offset = byte_offset;
}
void FreeData(DLTensor* tensor) { storage_->allocator->FreeView(tensor->device, tensor->data); }
private:
Storage storage_;
};
size_t needed_size = ffi::GetDataSize(shape.Product(), dtype);
TVM_FFI_ICHECK(offset + needed_size <= this->buffer.size)
<< "storage allocation failure, attempted to allocate " << needed_size << " at offset "
<< offset << " in region that is " << this->buffer.size << "bytes";
return Tensor::FromNDAlloc(StorageScopedAlloc(ffi::GetRef<Storage>(this)), shape, dtype,
this->buffer.device, shape, scope, offset);
}
Tensor StorageObj::AllocTensor(int64_t offset, ffi::Shape shape, DLDataType dtype) {
VerifyDataType(dtype);
size_t needed_size = ffi::GetDataSize(shape.Product(), dtype);
TVM_FFI_ICHECK(offset + needed_size <= this->buffer.size)
<< "storage allocation failure, attempted to allocate " << needed_size << " at offset "
<< offset << " in region that is " << this->buffer.size << "bytes";
class StorageAlloc {
public:
explicit StorageAlloc(Storage storage) : storage_(storage) {}
void AllocData(DLTensor* tensor, int64_t offset) {
if (storage_->buffer.device.device_type == kDLHexagon) {
// For Hexagon, non-zero offset support simply requires adjusting the
// beginning of data pointer
auto offset_ptr = reinterpret_cast<uint8_t*>(storage_->buffer.data) + offset;
tensor->data = reinterpret_cast<void*>(offset_ptr);
tensor->byte_offset = 0;
} else {
tensor->data = storage_->buffer.data;
tensor->byte_offset = offset;
}
}
void FreeData(DLTensor* tensor) {}
private:
Storage storage_;
};
return Tensor::FromNDAlloc(StorageAlloc(ffi::GetRef<Storage>(this)), shape, dtype,
this->buffer.device, offset);
}
MemoryManager* MemoryManager::Global() {
// NOTE: explicitly use new to avoid exit-time destruction of global state
// Global state will be recycled by OS as the process exits.
static auto* inst = new MemoryManager();
return inst;
}
std::string DeviceTypeStr(DLDeviceType type) {
switch (type) {
case kDLOpenCL:
return "opencl";
break;
case kDLVulkan:
return "vulkan";
break;
default:
return "";
}
}
Allocator* GetDeviceSpecificAllocator(Device dev, AllocatorType type) {
std::string dev_str = DeviceTypeStr(dev.device_type);
auto device_alloc_helper = tvm::ffi::Function::GetGlobal("DeviceAllocator." + dev_str);
void* valloc;
Allocator* allocator = nullptr;
if (device_alloc_helper) {
valloc = (*device_alloc_helper)(dev, static_cast<int>(type)).cast<void*>();
allocator = static_cast<Allocator*>(valloc);
}
if (nullptr == allocator) {
switch (type) {
case kNaive: {
VLOG(1) << "New naive allocator for " << dev;
allocator = new NaiveAllocator();
break;
}
case kPooled: {
VLOG(1) << "New pooled allocator for " << dev;
allocator = new PooledAllocator();
break;
}
default:
TVM_FFI_THROW(InternalError) << "Unknown allocator type: " << type;
}
}
return allocator;
}
Allocator* MemoryManager::GetOrCreateAllocator(Device dev, AllocatorType type) {
MemoryManager* m = MemoryManager::Global();
std::lock_guard<std::mutex> lock(m->mu_);
if (m->allocators_.find(dev) == m->allocators_.end()) {
m->allocators_.emplace(dev, std::unordered_map<AllocatorType, std::unique_ptr<Allocator>>());
}
if (m->allocators_.at(dev).find(type) == m->allocators_.at(dev).end()) {
std::unique_ptr<Allocator> alloc;
alloc.reset(GetDeviceSpecificAllocator(dev, type));
auto ret = alloc.get();
m->allocators_.at(dev).emplace(type, std::move(alloc));
return ret;
}
auto alloc = m->allocators_.at(dev).at(type).get();
return alloc;
}
Allocator* MemoryManager::GetAllocator(Device dev, AllocatorType type) {
MemoryManager* m = MemoryManager::Global();
std::lock_guard<std::mutex> lock(m->mu_);
auto it = m->allocators_.find(dev);
if (it == m->allocators_.end()) {
TVM_FFI_THROW(InternalError) << "Allocator for " << dev << " has not been created yet.";
}
if (it->second.find(type) == it->second.end()) {
TVM_FFI_THROW(InternalError) << "Allocator for " << dev << " of type " << type
<< " has not been created yet.";
}
return it->second.at(type).get();
}
void MemoryManager::Clear() {
MemoryManager* m = MemoryManager::Global();
std::lock_guard<std::mutex> lock(m->mu_);
for (const auto& [device, allocators] : m->allocators_) {
for (const auto& [allocator_type, allocator] : allocators) {
allocator->Clear();
}
}
}
Tensor Allocator::Empty(ffi::Shape shape, DLDataType dtype, DLDevice dev,
ffi::Optional<ffi::String> mem_scope) {
VerifyDataType(dtype);
class BufferAlloc {
public:
explicit BufferAlloc(Buffer buffer) : buffer_(buffer) {}
void AllocData(DLTensor* tensor) { tensor->data = buffer_.data; }
void FreeData(DLTensor* tensor) {
MemoryManager::GetAllocator(buffer_.device, buffer_.alloc_type)->Free(buffer_);
}
private:
Buffer buffer_;
};
size_t alignment = GetDataAlignment(dtype);
size_t size = ffi::GetDataSize(shape.Product(), dtype);
Buffer buffer;
if (!mem_scope.has_value() || (*mem_scope).empty() || (*mem_scope) == "global") {
buffer = this->Alloc(dev, size, alignment, dtype);
} else {
buffer = this->Alloc(dev, shape, dtype, *mem_scope);
}
return Tensor::FromNDAlloc(BufferAlloc(buffer), shape, dtype, dev);
}
bool Allocator::AllowMemoryScope(const std::string& mem_scope) const {
return mem_scope.empty() || mem_scope == "global";
}
Buffer Allocator::Alloc(Device dev, ffi::Shape shape, DLDataType type_hint,
const std::string& mem_scope) {
if (AllowMemoryScope(mem_scope)) {
// by default, we can always redirect to the flat memory allocations
size_t alignment = GetDataAlignment(type_hint);
size_t size = ffi::GetDataSize(shape.Product(), type_hint);
return Alloc(dev, size, alignment, type_hint);
}
TVM_FFI_THROW(InternalError) << "Allocator cannot allocate data space with "
<< "specified memory scope: " << mem_scope;
return {};
}
void Allocator::Clear() {
// This function by default does nothing.
// For naive allocator, no explicit manual clear is needed.
// Pooled allocator will override this method.
}
TVM_FFI_STATIC_INIT_BLOCK() {
namespace refl = tvm::ffi::reflection;
refl::GlobalDef().def("vm.builtin.memory_manager.clear", MemoryManager::Clear);
}
} // namespace memory
} // namespace runtime
} // namespace tvm