src/relay/backend/token_allocator.cc - tvm - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 /*!
  * \file relay/backend/token_allocator.cc
  * \brief Token allocation classes for backend
  */

 #include "token_allocator.h"

 #include <tvm/tir/op.h>

 #include <algorithm>
 #include <limits>

 namespace tvm {
 namespace relay {

 size_t TokenAllocator1D::GetMemorySize(StorageToken* prototype) {
   TensorType ttype = prototype->ttype;
   ICHECK(ttype.defined());
   size_t size = 1;
   for (IndexExpr dim : ttype->shape) {
     const int64_t* pval = tir::as_const_int(dim);
     ICHECK(pval != nullptr) << "Cannot allocate memory symbolic tensor shape " << ttype->shape;
     ICHECK_GE(*pval, 0) << "Cannot allocate memory for tensor with negative shape" << *pval;
     size *= static_cast<size_t>(pval[0]);
   }
   size *= DivRoundUp(ttype->dtype.bits() * ttype->dtype.lanes(), 8);
   return size;
 }

 StorageToken* TokenAllocator1D::Request(StorageToken* prototype) {
   // calculate the size;
   size_t size = GetMemorySize(prototype);
   // search memory block in [size / match_range_, size * match_range_)
   if (match_range_ == 0) {
     return nullptr;
   }
   auto begin = free_.lower_bound(size / match_range_);
   auto mid = free_.lower_bound(size);
   auto end = free_.upper_bound(size * match_range_);
   // search for memory blocks larger than requested
   for (auto it = mid; it != end; ++it) {
     StorageToken* tok = it->second;
     if (!tok->is_compatible(*prototype)) continue;
     ICHECK_EQ(tok->ref_counter, 0);
     // Use exect matching strategy
     tok->max_bytes = std::max(size, tok->max_bytes);
     tok->ref_counter = prototype->ref_counter;
     // find a exact match, erase from map and return
     free_.erase(it);
     return tok;
   }
   // then search for memory blocks smaller than requested space
   for (auto it = mid; it != begin;) {
     --it;
     StorageToken* tok = it->second;
     if (!tok->is_compatible(*prototype)) continue;
     ICHECK_EQ(tok->ref_counter, 0);
     // Use exect matching strategy
     tok->max_bytes = std::max(size, tok->max_bytes);
     tok->ref_counter = prototype->ref_counter;
     // erase from map and return
     free_.erase(it);
     return tok;
   }
   return nullptr;
 }

 StorageToken* TokenAllocator1D::Alloc(StorageToken* prototype, int64_t storage_id) {
   size_t size = GetMemorySize(prototype);
   prototype->max_bytes = size;
   prototype->storage_id = storage_id;
   data_.push_back(prototype);
   return prototype;
 }

 void TokenAllocator1D::CheckForRelease(StorageToken* tok) {
   ICHECK_GE(tok->storage_id, 0);
   ICHECK_GE(tok->ref_counter, 0);
   if (tok->ref_counter == 0) {
     free_.insert({tok->max_bytes, tok});
   }
 }

 StorageToken* TokenAllocator2D::Request(StorageToken* prototype) {
   auto shape = GetSize2D(prototype);
   const int64_t max_ratio = 5;
   int64_t min_added_size_x = std::numeric_limits<int64_t>::max();
   int64_t min_added_size_y = std::numeric_limits<int64_t>::max();
   int64_t min_wasted_size_x = std::numeric_limits<int64_t>::max();
   int64_t min_wasted_size_y = std::numeric_limits<int64_t>::max();
   int64_t best_storage_id = -1;
   MemBlock new_mem;
   for (int64_t free_id : free_list_) {
     MemBlock& cached = blocks_[free_id];
     // Can only reuse texture 2d blocks of the same type
     if (cached.token_->ttype->dtype != prototype->ttype->dtype) {
       continue;
     }
     // Can only reuse texture 2d blocks of the same scope
     // Because reusing textures with different memory scope may lead to
     // accuracy issues, because the data will be packed in a different way for
     // different memory scopes.
     if (cached.token_->virtual_device->memory_scope != prototype->virtual_device->memory_scope) {
       continue;
     }
     // avoid reusing too small and too big textures
     if (shape.width / cached.x_ > max_ratio || cached.x_ / shape.width > max_ratio ||
         shape.height / cached.y_ > max_ratio || cached.y_ / shape.height > max_ratio) {
       continue;
     }
     int64_t new_width = std::max(cached.x_, shape.width);
     int64_t new_height = std::max(cached.y_, shape.height);
     int64_t added_size_x = new_width - cached.x_;
     int64_t added_size_y = new_height - cached.y_;
     int64_t wasted_size_x = new_width - shape.width;
     int64_t wasted_size_y = new_height - shape.height;
     // Prioritize minimization of added size first, then minimize
     // wasted size among blocks which would not require expansion
     if ((min_added_size_x > 0 && added_size_x < min_added_size_x) ||
         (min_added_size_y > 0 && added_size_y < min_added_size_y) ||
         (min_added_size_x == added_size_x && wasted_size_x < min_wasted_size_x) ||
         (min_added_size_y == added_size_y && wasted_size_y < min_wasted_size_y)) {
       min_added_size_x = added_size_x;
       min_added_size_y = added_size_y;
       min_wasted_size_x = wasted_size_x;
       min_wasted_size_y = wasted_size_y;
       best_storage_id = free_id;
       new_mem.x_ = new_width;
       new_mem.y_ = new_height;
     }
   }

   if (min_added_size_x == 0 && min_added_size_y == 0) {
     // use existing block
     free_list_.erase(best_storage_id);
     blocks_[best_storage_id].token_->ref_counter += prototype->ref_counter;
     return blocks_[best_storage_id].token_;
   } else if (min_added_size_x <= shape.width || min_added_size_y <= shape.height) {
     // Reset the reference counter of the now live token
     free_list_.erase(best_storage_id);
     new_mem.token_ = prototype;
     new_mem.token_->ref_counter += 1;
     new_mem.token_->storage_id = best_storage_id;
     blocks_[best_storage_id] = new_mem;
     return new_mem.token_;
   }
   return nullptr;
 }

 StorageToken* TokenAllocator2D::Alloc(StorageToken* prototype, int64_t storage_id) {
   auto shape = GetSize2D(prototype);
   MemBlock block;
   block.x_ = shape.width;
   block.y_ = shape.height;
   prototype->storage_id = storage_id;
   block.token_ = prototype;
   blocks_[prototype->storage_id] = block;
   return prototype;
 }

 void TokenAllocator2D::CheckForRelease(StorageToken* tok) {
   ICHECK_GE(tok->storage_id, 0);
   ICHECK_GE(tok->ref_counter, 0);
   if (tok->ref_counter == 0) {
     free_list_.insert(tok->storage_id);
   }
 }

 runtime::Texture2DShape<int64_t> TokenAllocator2D::GetSize2D(StorageToken* prototype) {
   TensorType ttype = prototype->ttype;
   ICHECK(ttype.defined());
   size_t axis = runtime::DefaultTextureLayoutSeparator(ttype->shape.size(),
                                                        prototype->virtual_device->memory_scope);
   struct Shape {
     const Array<PrimExpr>& shape;
     int64_t operator[](size_t i) const { return *tir::as_const_int(shape[i]); }
   };
   return runtime::ApplyTexture2DFlattening<int64_t>(Shape{ttype->shape}, ttype->shape.size(), axis);
 }

 }  // namespace relay
 }  // namespace tvm
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/

	/*!
	* \file relay/backend/token_allocator.cc
	* \brief Token allocation classes for backend
	*/

	#include "token_allocator.h"

	#include <tvm/tir/op.h>

	#include <algorithm>
	#include <limits>

	namespace tvm {
	namespace relay {

	size_t TokenAllocator1D::GetMemorySize(StorageToken* prototype) {
	TensorType ttype = prototype->ttype;
	ICHECK(ttype.defined());
	size_t size = 1;
	for (IndexExpr dim : ttype->shape) {
	const int64_t* pval = tir::as_const_int(dim);
	ICHECK(pval != nullptr) << "Cannot allocate memory symbolic tensor shape " << ttype->shape;
	ICHECK_GE(pval, 0) << "Cannot allocate memory for tensor with negative shape" << pval;
	size *= static_cast<size_t>(pval[0]);
	}
	size = DivRoundUp(ttype->dtype.bits() ttype->dtype.lanes(), 8);
	return size;
	}

	StorageToken* TokenAllocator1D::Request(StorageToken* prototype) {
	// calculate the size;
	size_t size = GetMemorySize(prototype);
	// search memory block in [size / match_range_, size * match_range_)
	if (match_range_ == 0) {
	return nullptr;
	}
	auto begin = free_.lower_bound(size / match_range_);
	auto mid = free_.lower_bound(size);
	auto end = free_.upper_bound(size * match_range_);
	// search for memory blocks larger than requested
	for (auto it = mid; it != end; ++it) {
	StorageToken* tok = it->second;
	if (!tok->is_compatible(*prototype)) continue;
	ICHECK_EQ(tok->ref_counter, 0);
	// Use exect matching strategy
	tok->max_bytes = std::max(size, tok->max_bytes);
	tok->ref_counter = prototype->ref_counter;
	// find a exact match, erase from map and return
	free_.erase(it);
	return tok;
	}
	// then search for memory blocks smaller than requested space
	for (auto it = mid; it != begin;) {
	--it;
	StorageToken* tok = it->second;
	if (!tok->is_compatible(*prototype)) continue;
	ICHECK_EQ(tok->ref_counter, 0);
	// Use exect matching strategy
	tok->max_bytes = std::max(size, tok->max_bytes);
	tok->ref_counter = prototype->ref_counter;
	// erase from map and return
	free_.erase(it);
	return tok;
	}
	return nullptr;
	}

	StorageToken* TokenAllocator1D::Alloc(StorageToken* prototype, int64_t storage_id) {
	size_t size = GetMemorySize(prototype);
	prototype->max_bytes = size;
	prototype->storage_id = storage_id;
	data_.push_back(prototype);
	return prototype;
	}

	void TokenAllocator1D::CheckForRelease(StorageToken* tok) {
	ICHECK_GE(tok->storage_id, 0);
	ICHECK_GE(tok->ref_counter, 0);
	if (tok->ref_counter == 0) {
	free_.insert({tok->max_bytes, tok});
	}
	}

	StorageToken* TokenAllocator2D::Request(StorageToken* prototype) {
	auto shape = GetSize2D(prototype);
	const int64_t max_ratio = 5;
	int64_t min_added_size_x = std::numeric_limits<int64_t>::max();
	int64_t min_added_size_y = std::numeric_limits<int64_t>::max();
	int64_t min_wasted_size_x = std::numeric_limits<int64_t>::max();
	int64_t min_wasted_size_y = std::numeric_limits<int64_t>::max();
	int64_t best_storage_id = -1;
	MemBlock new_mem;
	for (int64_t free_id : free_list_) {
	MemBlock& cached = blocks_[free_id];
	// Can only reuse texture 2d blocks of the same type
	if (cached.token_->ttype->dtype != prototype->ttype->dtype) {
	continue;
	}
	// Can only reuse texture 2d blocks of the same scope
	// Because reusing textures with different memory scope may lead to
	// accuracy issues, because the data will be packed in a different way for
	// different memory scopes.
	if (cached.token_->virtual_device->memory_scope != prototype->virtual_device->memory_scope) {
	continue;
	}
	// avoid reusing too small and too big textures
	if (shape.width / cached.x_ > max_ratio \|\| cached.x_ / shape.width > max_ratio \|\|
	shape.height / cached.y_ > max_ratio \|\| cached.y_ / shape.height > max_ratio) {
	continue;
	}
	int64_t new_width = std::max(cached.x_, shape.width);
	int64_t new_height = std::max(cached.y_, shape.height);
	int64_t added_size_x = new_width - cached.x_;
	int64_t added_size_y = new_height - cached.y_;
	int64_t wasted_size_x = new_width - shape.width;
	int64_t wasted_size_y = new_height - shape.height;
	// Prioritize minimization of added size first, then minimize
	// wasted size among blocks which would not require expansion
	if ((min_added_size_x > 0 && added_size_x < min_added_size_x) \|\|
	(min_added_size_y > 0 && added_size_y < min_added_size_y) \|\|
	(min_added_size_x == added_size_x && wasted_size_x < min_wasted_size_x) \|\|
	(min_added_size_y == added_size_y && wasted_size_y < min_wasted_size_y)) {
	min_added_size_x = added_size_x;
	min_added_size_y = added_size_y;
	min_wasted_size_x = wasted_size_x;
	min_wasted_size_y = wasted_size_y;
	best_storage_id = free_id;
	new_mem.x_ = new_width;
	new_mem.y_ = new_height;
	}
	}

	if (min_added_size_x == 0 && min_added_size_y == 0) {
	// use existing block
	free_list_.erase(best_storage_id);
	blocks_[best_storage_id].token_->ref_counter += prototype->ref_counter;
	return blocks_[best_storage_id].token_;
	} else if (min_added_size_x <= shape.width \|\| min_added_size_y <= shape.height) {
	// Reset the reference counter of the now live token
	free_list_.erase(best_storage_id);
	new_mem.token_ = prototype;
	new_mem.token_->ref_counter += 1;
	new_mem.token_->storage_id = best_storage_id;
	blocks_[best_storage_id] = new_mem;
	return new_mem.token_;
	}
	return nullptr;
	}

	StorageToken* TokenAllocator2D::Alloc(StorageToken* prototype, int64_t storage_id) {
	auto shape = GetSize2D(prototype);
	MemBlock block;
	block.x_ = shape.width;
	block.y_ = shape.height;
	prototype->storage_id = storage_id;
	block.token_ = prototype;
	blocks_[prototype->storage_id] = block;
	return prototype;
	}

	void TokenAllocator2D::CheckForRelease(StorageToken* tok) {
	ICHECK_GE(tok->storage_id, 0);
	ICHECK_GE(tok->ref_counter, 0);
	if (tok->ref_counter == 0) {
	free_list_.insert(tok->storage_id);
	}
	}

	runtime::Texture2DShape<int64_t> TokenAllocator2D::GetSize2D(StorageToken* prototype) {
	TensorType ttype = prototype->ttype;
	ICHECK(ttype.defined());
	size_t axis = runtime::DefaultTextureLayoutSeparator(ttype->shape.size(),
	prototype->virtual_device->memory_scope);
	struct Shape {
	const Array<PrimExpr>& shape;
	int64_t operator[](size_t i) const { return *tir::as_const_int(shape[i]); }
	};
	return runtime::ApplyTexture2DFlattening<int64_t>(Shape{ttype->shape}, ttype->shape.size(), axis);
	}

	} // namespace relay
	} // namespace tvm