blob: 9e61f557f48e952a5c00a226a44c47b22b3f013e [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*!
* \file src/runtime/contrib/clml/clml_memory_planner.cc
* \brief Various memory planning methods.
*/
#ifdef TVM_GRAPH_EXECUTOR_CLML
#include "clml_memory_planner.h"
#include <map>
#include <utility>
#include "clml_utils.h"
namespace tvm {
namespace runtime {
namespace contrib {
using namespace tvm::runtime::json;
using JSONGraphNode = tvm::runtime::json::JSONGraphNode;
/*!
* Release memory after use.
*
*/
void FreeMemory(CachedLayer* layer, int nid) {
LOG_MEM << "FreeMemory:" << nid;
if (layer->storage_ref_map.find(nid) != layer->storage_ref_map.end()) {
LOG_MEM << "Ref Cnt:" << layer->storage_ref_map[nid];
layer->storage_ref_map[nid]--;
if (0 == layer->storage_ref_map[nid]) {
LOG_MEM << "Ref Cnt Nill";
// Look into on-chip allocation
for (auto it = layer->on_chip_pool_alloc_info.begin();
it != layer->on_chip_pool_alloc_info.end(); it++) {
if (it->second == nid) {
LOG_MEM << "Free Segment:" << it->first << " Nid:" << nid;
layer->in_chip_total_free += layer->on_chip_pool_size[it->first];
layer->in_chip_total_alloc -= layer->on_chip_pool_size[it->first];
layer->on_chip_pool_alloc_info.erase(it->first);
return;
}
}
// Look into DDR allocation
if (layer->ddr_alloc_plan.find(nid) != layer->ddr_alloc_plan.end()) {
LOG_MEM << "Free DDR segment from local pool";
layer->ddr_storage_ref_map[layer->ddr_alloc_plan[nid]].second = false;
return;
}
LOG_MEM << "*** Not a managed memory buffer";
}
} else {
LOG_MEM << "Not in storage ref map :" << nid;
}
}
/*!
* \brief Partition and allocate
*
*/
size_t PartitionAndAllocate(CachedLayer* layer, size_t segment_start, size_t size, bool is_left) {
LOG_MEM << "PartitionAndAllocate:" << segment_start << " Size:" << size
<< " Is Begin:" << is_left;
size_t segment_size = layer->on_chip_pool_size[segment_start];
size_t left_space = segment_size - size;
layer->in_chip_total_free -= size;
layer->in_chip_total_alloc += size;
if (is_left) {
// Start allocation
layer->on_chip_pool_size[segment_start] = size;
if (left_space) {
layer->on_chip_pool_size.insert({segment_start + size, left_space});
}
return segment_start;
} else {
// End allocation
if (left_space) {
layer->on_chip_pool_size[segment_start] = left_space;
}
layer->on_chip_pool_size.insert({segment_start + left_space, size});
return segment_start + left_space;
}
}
/*!
* \brief Ping-Pong allocation with in best fit
*
*/
size_t PingPongAllocate(CachedLayer* layer, const std::map<size_t, size_t>& segments, size_t size) {
/*
* segments contains all free segments details (start, size) that can fit the requirement
* PingPong Allocation Strategy:
* Here we find the smallest segment among all.
* We allocate at begining or end of this segment based on the ping-pong flag.
* Ping-pong allocation helps to have largest possible free segment at center
* for most of the graphs.
*
*/
ssize_t free_start;
ssize_t free_size;
ssize_t last_found_size = CLMLWorkspace::Global()->onchip_mem_size + 1;
for (auto it = segments.begin(); it != segments.end(); it++) {
if (it->second < last_found_size) {
free_start = it->first;
free_size = it->second;
last_found_size = it->second;
LOG_MEM << "Mem Found:" << free_start << " Size:" << free_size;
}
}
LOG_MEM << "Alloc On-chip Mem:" << free_start << " Size:" << free_size
<< " PingPong:" << layer->alloc_ping_pong;
// Allocate on-chip memory
layer->alloc_ping_pong ^= 1;
return PartitionAndAllocate(layer, free_start, size, layer->alloc_ping_pong);
}
/*!
* \brief Allocate on-chip memory.
*
*/
size_t RequestOnChipMemory(CachedLayer* layer, size_t size) {
LOG_MEM << "Request On-Chip Mem:" << size;
// Optimize for any fragmented parts
bool any_merge = true;
while (any_merge) {
any_merge = false;
for (auto it = layer->on_chip_pool_size.begin(); it != layer->on_chip_pool_size.end(); it++) {
if ((layer->on_chip_pool_alloc_info.find(it->first) ==
layer->on_chip_pool_alloc_info.end()) &&
(layer->on_chip_pool_alloc_info.find(it->first + it->second) ==
layer->on_chip_pool_alloc_info.end()) &&
(it->first + it->second < CLMLWorkspace::Global()->onchip_mem_size)) {
size_t left_begin = it->first;
size_t left_size = it->second;
size_t right_size = layer->on_chip_pool_size[it->first + it->second];
LOG_MEM << "Merge:" << left_begin << " Size:" << left_size << " with :" << right_size;
layer->on_chip_pool_size[left_begin] = left_size + right_size;
layer->on_chip_pool_size.erase(left_begin + left_size);
any_merge = true;
break;
}
}
}
// Look for any best fit free fragment
std::map<size_t, size_t> feasible_segments;
for (auto it = layer->on_chip_pool_size.begin(); it != layer->on_chip_pool_size.end(); it++) {
if (layer->on_chip_pool_alloc_info.find(it->first) == layer->on_chip_pool_alloc_info.end()) {
if (it->second >= size) {
LOG_MEM << "Mem Pool:" << it->first << " - " << it->first + it->second << ":" << it->second
<< " - Free";
feasible_segments.insert({it->first, it->second});
} else {
LOG_MEM << "Mem Pool:" << it->first << " - " << it->first + it->second << ":" << it->second
<< " - Doesn't fit";
}
} else {
LOG_MEM << "Mem Pool:" << it->first << " - " << it->first + it->second << ":" << it->second
<< " - Busy";
}
}
if (0 == feasible_segments.size()) {
LOG_MEM << "No Suitable Mem Found:" << size << " Free Size:" << layer->in_chip_total_free;
if (size <= layer->in_chip_total_free) {
LOG_STATS << "*** ALERT ***: Couldn't allocate due to fragmentation:" << size
<< " Total Free:" << layer->in_chip_total_free;
layer->on_chip_alert_fail += size;
}
return -1;
}
return PingPongAllocate(layer, feasible_segments, size);
}
/*!
* \brief Allocate DDR memory for requested size.
*
*/
cl_mem RequestDDRMemory(CachedLayer* layer, size_t size) {
// Look for local storage map for a best fit
auto cws = CLMLWorkspace::Global();
cl_mem memptr = nullptr;
size_t best_fit = INT_MAX;
for (auto it = layer->ddr_storage_ref_map.begin(); it != layer->ddr_storage_ref_map.end(); it++) {
if ((it->second.first >= size) && (false == it->second.second)) {
if (best_fit > it->second.first) {
memptr = it->first;
best_fit = it->second.first;
}
}
}
if (memptr) {
LOG_MEM << "Reuse from local pool";
layer->ddr_storage_ref_map[memptr].second = true;
return memptr;
}
// No available buffer in local pool, look for global pool
for (auto it = cws->ddr_global_pool.begin(); it != cws->ddr_global_pool.end(); it++) {
if ((it->second.first >= size) &&
(layer->ddr_storage_ref_map.find(it->first) == layer->ddr_storage_ref_map.end())) {
// Found a buffer in global pool. Insert in local pool and then use.
if (best_fit > it->second.first) {
memptr = it->first;
best_fit = it->second.first;
}
}
}
if (memptr) {
LOG_MEM << "Reuse from global pool";
cws->ddr_global_pool[memptr].second += 1;
layer->ddr_storage_ref_map.insert(
{memptr, std::make_pair(cws->ddr_global_pool[memptr].first, true)});
return memptr;
}
// Allocate a fresh buffer in global then use in local pool.
LOG_MEM << "Allocating fresh buffer in global pool";
memptr = AllocateDDRTensorMemory(size);
cws->ddr_global_pool.insert({memptr, std::make_pair(size, 1)});
layer->ddr_storage_ref_map.insert({memptr, std::make_pair(size, true)});
return memptr;
}
/*!
* \brief Release memory from global pool.
*
*/
void ReleaseDDRMemory(cl_mem memptr) {
cl_int result;
auto cws = CLMLWorkspace::Global();
cws->ddr_global_pool[memptr].second -= 1;
if (0 == cws->ddr_global_pool[memptr].second) {
LOG_MEM << "Release DDR mem from global pool";
result = clReleaseMemObject(memptr);
ICHECK(result == CL_SUCCESS) << "clReleaseMemObject:" << result;
cws->ddr_global_pool.erase(memptr);
}
}
} // namespace contrib
} // namespace runtime
} // namespace tvm
#endif