blob: b090f0ccfbdac86eaa70ed24966f6b1bb1ba4a07 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*!
* \file src/runtime/contrib/arm_compute_lib/acl_runtime.cc
* \brief A simple JSON runtime for Arm Compute Library.
*/
#include <tvm/ffi/function.h>
#include <tvm/ffi/reflection/registry.h>
#include <tvm/runtime/tensor.h>
#include "../json/json_node.h"
#include "../json/json_runtime.h"
#ifdef TVM_GRAPH_EXECUTOR_ARM_COMPUTE_LIB
#include <arm_compute/core/Types.h>
#include <arm_compute/runtime/NEON/functions/NEArithmeticAddition.h>
#include <arm_compute/runtime/NEON/functions/NEConcatenateLayer.h>
#include <arm_compute/runtime/NEON/functions/NEConvolutionLayer.h>
#include <arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h>
#include <arm_compute/runtime/NEON/functions/NEElementwiseOperations.h>
#include <arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h>
#include <arm_compute/runtime/NEON/functions/NEPoolingLayer.h>
#include <arm_compute/runtime/NEON/functions/NEReshapeLayer.h>
#include "acl_allocator.h"
#include "acl_utils.h"
#endif
namespace tvm {
namespace runtime {
namespace contrib {
using namespace tvm::runtime::json;
class ACLRuntime : public JSONRuntimeBase {
public:
/*!
* \brief The ACL runtime module. Deserialize the provided functions
* on creation and store in the layer cache.
*
* \param symbol_name The name of the function.
* \param graph_json serialized JSON representation of a sub-graph.
* \param const_names The names of each constant in the sub-graph.
*/
explicit ACLRuntime(const std::string& symbol_name, const std::string& graph_json,
const ffi::Array<ffi::String>& const_names)
: JSONRuntimeBase(symbol_name, graph_json, const_names) {}
/*!
* \brief The type key of the module.
*
* \return module type key.
*/
const char* kind() const override { return "arm_compute_lib"; }
/*!
* \brief Initialize runtime. Create ACL layer from JSON
* representation.
*
* \param consts The constant params from compiled model.
*/
void Init(const ffi::Array<Tensor>& consts) override {
ICHECK_EQ(consts.size(), const_idx_.size())
<< "The number of input constants must match the number of required.";
SetupConstants(consts);
BuildEngine();
}
#ifdef TVM_GRAPH_EXECUTOR_ARM_COMPUTE_LIB
/*!
* \brief Unpack inputs and outputs and run inference on a given layer.
*
* \param args Access inputs and outputs.
* \param function The layer to execute inference on.
* \return Status of inference.
*/
void Run() override {
for (size_t nid_idx = 0; nid_idx < input_nodes_.size(); ++nid_idx) {
auto nid = input_nodes_[nid_idx];
if (nodes_[nid].GetOpType() == "input") {
for (uint32_t eid_idx = 0; eid_idx < nodes_[nid].GetNumOutput(); eid_idx++) {
uint32_t eid = EntryID(nid, eid_idx);
void* data = data_entry_[eid]->data;
auto key = std::pair<uint32_t, uint32_t>(nid, eid_idx);
if (layer_.json_inputid_to_layer_inputid.count(key) > 0) {
CheckACLError(
layer_.inputs[layer_.json_inputid_to_layer_inputid[key]].allocator()->import_memory(
data));
} else {
CheckACLError(layer_.inputs[nid_idx].allocator()->import_memory(data));
}
}
}
}
for (size_t i = 0; i < outputs_.size(); ++i) {
uint32_t eid = EntryID(outputs_[i]);
void* data = data_entry_[eid]->data;
CheckACLError(layer_.outputs[i].allocator()->import_memory(data));
}
this->layer_.function->run();
}
private:
/*!
* \brief Build ACL layer from JSON representation and cache.
*
* \note For the time being only one layer or operator is supported
* per engine.
*/
void BuildEngine() {
std::shared_ptr<arm_compute::MemoryManagerOnDemand> mm = MakeACLMemoryManager();
int num_pools = 0;
bool found_kernel_node = false;
for (size_t nid = 0; nid < nodes_.size(); ++nid) {
const auto& node = nodes_[nid];
if (found_kernel_node) {
LOG(FATAL)
<< "Arm Compute Library runtime module only supports one kernel node per function.";
}
if (node.GetOpType() == "kernel") {
found_kernel_node = true;
auto op_name = node.GetOpName();
if ("nn.conv2d" == op_name || "qnn.conv2d" == op_name) {
CreateConvolution2DLayer(&layer_, node, mm);
num_pools++;
} else if ("nn.depthwise_conv2d" == op_name || "qnn.depthwise_conv2d" == op_name) {
CreateDepthwiseConvolution2DLayer(&layer_, node, mm);
num_pools++;
} else if ("nn.dense" == op_name || "qnn.dense" == op_name) {
CreateFullyConnectedLayer(&layer_, node, mm);
num_pools++;
} else if ("nn.max_pool2d" == op_name || "nn.avg_pool2d" == op_name ||
"nn.l2_pool2d" == op_name) {
CreatePoolingLayer(&layer_, node);
} else if ("nn.global_max_pool2d" == op_name || "nn.global_avg_pool2d" == op_name) {
CreateGlobalPoolingLayer(&layer_, node);
} else if ("reshape" == op_name) {
CreateReshapeLayer(&layer_, node);
} else if ("maximum" == op_name) {
CreateMaximumLayer(&layer_, node);
} else if ("add" == op_name || "qnn.add" == op_name) {
CreateAddLayer(&layer_, node);
} else if ("concatenate" == op_name) {
CreateConcatenateLayer(&layer_, node);
} else {
LOG(FATAL) << "Unsupported op: " << op_name;
}
}
}
this->layer_.function->prepare();
if (num_pools > 0) mm->populate(this->allocator_, num_pools);
}
/*!
* \brief ACL objects we cache in order to avoid needing to construct
* a new layer each time.
*/
struct CachedLayer {
std::shared_ptr<arm_compute::IFunction> function;
std::vector<arm_compute::Tensor> inputs;
std::vector<arm_compute::Tensor> outputs;
// maps the input index of JSON node to the index of the ACL layer's inputs
// this is optional (i.e.only when an operator uses the eid index)
std::map<std::pair<uint32_t, uint32_t>, uint32_t> json_inputid_to_layer_inputid;
};
/*!
* \brief Create an ACL tensor given the JSON representation. If scale
* and offset are given, then create a quantized ACL tensor.
*
* \param tensor The tensor to represent.
* \param scale (optional) The scale of the tensor as an input.
* \param offset (optional) The offset of the tensor as an input.
* \param apply_dim_correction (Optional) Flag to state whether apply dimension correction after
* setting one dimension. E.g. when permuting NCHW -> NHWC, 1x1x2 would become 2x1x1, but
* _num_dimensions should be 3 rather than 1.
* \param increase_dim_unit (Optional) Set to true if new unit dimensions increase the number of
* dimensions of the shape.
* \return ACL Tensor.
*/
arm_compute::Tensor MakeACLTensorFromJSONEntry(const JSONGraphNodeEntry& tensor,
JSONGraphNodeEntry* scale = nullptr,
JSONGraphNodeEntry* offset = nullptr,
bool apply_dim_correction = true,
bool increase_dim_unit = true) {
JSONGraphNode node = nodes_[tensor.id_];
void* node_data = nullptr;
if (node.GetOpType() == "const") {
node_data = data_entry_[EntryID(tensor)]->data;
}
return MakeACLTensorFromJSONNode(node, scale, offset, node_data, apply_dim_correction,
increase_dim_unit, tensor.index_);
}
/*!
* \brief Create an ACL tensor given the JSON representation. If scale
* and offset are given, then create a quantized ACL tensor.
*
* \param node The tensor to represent.
* \param scale (optional) The scale of the tensor as an input.
* \param offset (optional) The offset of the tensor as an input.
* \param data (optional) Constant data of input node.
* \param apply_dim_correction (Optional) Flag to state whether apply dimension correction after
* setting one dimension. E.g. when permuting NCHW -> NHWC, 1x1x2 would become 2x1x1, but
* _num_dimensions should be 3 rather than 1.
* \param increase_dim_unit (Optional) Set to true if new unit dimensions increase the number of
* dimensions of the shape.
* \param entry_index The entry index.
* \return ACL Tensor.
*/
arm_compute::Tensor MakeACLTensorFromJSONNode(
const JSONGraphNode& node, JSONGraphNodeEntry* scale = nullptr,
JSONGraphNodeEntry* offset = nullptr, void* data = nullptr, bool apply_dim_correction = true,
bool increase_dim_unit = true, uint32_t entry_index = 0) {
const DLTensor* scale_data = nullptr;
const DLTensor* offset_data = nullptr;
if (scale && offset) {
scale_data = data_entry_[EntryID(*scale)];
offset_data = data_entry_[EntryID(*offset)];
}
return MakeACLTensor(node, data, scale_data, offset_data, apply_dim_correction,
increase_dim_unit, entry_index);
}
/*!
* \brief Create a 2D convolution layer.
*
* \param layer The ACL layer to build. Containing inputs, outputs and the ACL function.
* \param node The JSON representation of the operator.
* \param mm The ACL conv2d layer can request auxiliary memory from TVM.
*/
void CreateConvolution2DLayer(CachedLayer* layer, const JSONGraphNode& node,
const std::shared_ptr<arm_compute::MemoryManagerOnDemand>& mm) {
std::vector<std::string> padding = node.GetAttr<std::vector<std::string>>("padding");
std::vector<std::string> strides = node.GetAttr<std::vector<std::string>>("strides");
std::vector<std::string> dilation = node.GetAttr<std::vector<std::string>>("dilation");
arm_compute::PadStrideInfo pad_stride_info = MakeACLPadStride(padding, strides);
int groups = std::stoi(node.GetAttr<std::vector<std::string>>("groups")[0]);
ICHECK(groups == 1) << "Arm Compute Library NEON convolution only supports group size of 1.";
arm_compute::ActivationLayerInfo act_info;
if (node.HasAttr("activation_type")) {
std::string activation_type = node.GetAttr<std::vector<std::string>>("activation_type")[0];
act_info = MakeACLActivationInfo(activation_type);
}
arm_compute::Size2D dilation_2d(std::stoi(dilation[0]), std::stoi(dilation[1]));
// Collect inputs and outputs, handling both nn.conv2d and qnn.conv2d cases.
std::vector<JSONGraphNodeEntry> inputs = node.GetInputs();
size_t num_inputs = inputs.size();
bool has_bias;
if (node.GetOpName() == "qnn.conv2d") {
ICHECK(num_inputs >= 8U && num_inputs <= 9U)
<< "Quantized convolution requires 9 inputs with a bias, 8 inputs without.";
has_bias = num_inputs == 9;
layer->inputs.push_back(MakeACLTensorFromJSONEntry(inputs[0], &inputs[4], &inputs[2]));
layer->inputs.push_back(MakeACLTensorFromJSONEntry(inputs[1], &inputs[5], &inputs[3]));
if (has_bias) {
layer->inputs.push_back(MakeACLTensorFromJSONEntry(inputs[6]));
}
layer->outputs.push_back(
MakeACLTensorFromJSONNode(node, &inputs[6 + has_bias], &inputs[7 + has_bias]));
} else {
ICHECK(num_inputs >= 2U && num_inputs <= 3U)
<< "Convolution requires 3 inputs with a bias, 2 inputs without.";
has_bias = num_inputs == 3;
for (const auto& i : inputs) {
layer->inputs.push_back(MakeACLTensorFromJSONEntry(i));
}
layer->outputs.push_back(MakeACLTensorFromJSONNode(node));
}
auto function = std::make_shared<arm_compute::NEConvolutionLayer>(mm);
function->configure(&layer->inputs[0], &layer->inputs[1],
has_bias ? &layer->inputs[2] : nullptr, &layer->outputs[0], pad_stride_info,
arm_compute::WeightsInfo(), dilation_2d, act_info);
layer->function = function;
}
/*!
* \brief Create a 2D depthwise convolution layer.
*
* \param layer The ACL layer to build. Containing inputs, outputs and the ACL function.
* \param node The JSON representation of the operator.
* \param mm The ACL conv2d layer can request auxiliary memory from TVM.
*/
void CreateDepthwiseConvolution2DLayer(
CachedLayer* layer, const JSONGraphNode& node,
const std::shared_ptr<arm_compute::MemoryManagerOnDemand>& mm) {
std::vector<std::string> padding = node.GetAttr<std::vector<std::string>>("padding");
std::vector<std::string> strides = node.GetAttr<std::vector<std::string>>("strides");
std::vector<std::string> dilation = node.GetAttr<std::vector<std::string>>("dilation");
arm_compute::PadStrideInfo pad_stride_info = MakeACLPadStride(padding, strides);
arm_compute::ActivationLayerInfo act_info;
if (node.HasAttr("activation_type")) {
std::string activation_type = node.GetAttr<std::vector<std::string>>("activation_type")[0];
act_info = MakeACLActivationInfo(activation_type);
}
arm_compute::Size2D dilation_2d(std::stoi(dilation[0]), std::stoi(dilation[1]));
// Collect inputs and outputs, handling both nn.conv2d and qnn.conv2d cases.
std::vector<JSONGraphNodeEntry> inputs = node.GetInputs();
size_t num_inputs = inputs.size();
bool has_bias;
if (node.GetOpName() == "qnn.depthwise_conv2d") {
ICHECK(num_inputs >= 8U && num_inputs <= 9U)
<< "Quantized convolution requires 9 inputs with a bias, 8 inputs without.";
has_bias = num_inputs == 9;
layer->inputs.push_back(MakeACLTensorFromJSONEntry(inputs[0], &inputs[4], &inputs[2]));
layer->inputs.push_back(MakeACLTensorFromJSONEntry(inputs[1], &inputs[5], &inputs[3]));
if (has_bias) {
layer->inputs.push_back(MakeACLTensorFromJSONEntry(inputs[6]));
}
layer->outputs.push_back(
MakeACLTensorFromJSONNode(node, &inputs[6 + has_bias], &inputs[7 + has_bias]));
} else {
ICHECK(num_inputs >= 2U && num_inputs <= 3U)
<< "Convolution requires 3 inputs with a bias, 2 inputs without.";
has_bias = num_inputs == 3;
for (const auto& i : inputs) {
layer->inputs.push_back(MakeACLTensorFromJSONEntry(i));
}
layer->outputs.push_back(MakeACLTensorFromJSONNode(node));
}
// Depth multiplier is the final dimension in acl weights tensor (IWH*M*)
int depth_multiplier = layer->inputs[1].info()->tensor_shape()[3];
auto function = std::make_shared<arm_compute::NEDepthwiseConvolutionLayer>(mm);
function->configure(&layer->inputs[0], &layer->inputs[1],
has_bias ? &layer->inputs[2] : nullptr, &layer->outputs[0], pad_stride_info,
depth_multiplier, act_info, dilation_2d);
layer->function = function;
}
/*!
* \brief Create a fully connected (dense) layer.
*
* \param layer The ACL layer to build. Containing inputs, outputs and the ACL function.
* \param node The JSON representation of the operator.
* \param mm The ACL fully connected layer can request auxiliary memory from TVM.
*/
void CreateFullyConnectedLayer(CachedLayer* layer, const JSONGraphNode& node,
const std::shared_ptr<arm_compute::MemoryManagerOnDemand>& mm) {
arm_compute::FullyConnectedLayerInfo fc_info;
fc_info.set_weights_trained_layout(arm_compute::DataLayout::NHWC);
// Collect inputs and outputs, handling both nn.dense and qnn.dense cases.
std::vector<JSONGraphNodeEntry> inputs = node.GetInputs();
size_t num_inputs = inputs.size();
bool has_bias;
if (node.GetOpName() == "qnn.dense") {
ICHECK(num_inputs >= 8U && num_inputs <= 9U)
<< "Quantized fully connected (dense) layer requires 9 inputs with a bias, 8 inputs "
"without.";
has_bias = num_inputs == 9;
layer->inputs.push_back(MakeACLTensorFromJSONEntry(inputs[0], &inputs[4], &inputs[2]));
layer->inputs.push_back(MakeACLTensorFromJSONEntry(inputs[1], &inputs[5], &inputs[3]));
if (has_bias) {
layer->inputs.push_back(MakeACLTensorFromJSONEntry(inputs[6]));
}
layer->outputs.push_back(
MakeACLTensorFromJSONNode(node, &inputs[6 + has_bias], &inputs[7 + has_bias]));
} else {
ICHECK(num_inputs >= 2U && num_inputs <= 3U)
<< "Fully connected (dense) layer requires 3 inputs with a bias, 2 inputs without.";
has_bias = num_inputs == 3;
for (const auto& i : inputs) {
layer->inputs.push_back(MakeACLTensorFromJSONEntry(i));
}
layer->outputs.push_back(MakeACLTensorFromJSONNode(node));
}
auto function = std::make_shared<arm_compute::NEFullyConnectedLayer>(mm);
function->configure(&layer->inputs[0], &layer->inputs[1],
has_bias ? &layer->inputs[2] : nullptr, &layer->outputs[0], fc_info);
layer->function = function;
}
/*!
* \brief Create a pooling layer.
*
* \note Currently max_pool2d, avg_pool2d and L2 pooling are supported.
*
* \param layer The ACL layer to build. Containing inputs, outputs and the ACL function.
* \param node The JSON representation of the operator.
*/
void CreatePoolingLayer(CachedLayer* layer, const JSONGraphNode& node) {
std::vector<std::string> padding = node.GetAttr<std::vector<std::string>>("padding");
std::vector<std::string> strides = node.GetAttr<std::vector<std::string>>("strides");
std::vector<std::string> dilation = node.GetAttr<std::vector<std::string>>("dilation");
bool ceil_mode = std::stoi(node.GetAttr<std::vector<std::string>>("ceil_mode")[0]);
arm_compute::PadStrideInfo pad_stride_info = MakeACLPadStride(padding, strides, ceil_mode);
auto attr_pool_size = node.GetAttr<std::vector<std::string>>("pool_size");
int pool_size_h = std::stoi(attr_pool_size[0]);
int pool_size_w = std::stoi(attr_pool_size[1]);
// Only applies to average pool and l2 pool.
// ACL exclude pad option is inverse to Relays include pad option.
bool exclude_pad = false;
if (node.HasAttr("count_include_pad")) {
int count_include_pad =
std::stoi(node.GetAttr<std::vector<std::string>>("count_include_pad")[0]);
exclude_pad = !count_include_pad;
}
arm_compute::PoolingType pool_type;
if (node.GetOpName() == "nn.max_pool2d") {
pool_type = arm_compute::PoolingType::MAX;
} else if (node.GetOpName() == "nn.avg_pool2d") {
pool_type = arm_compute::PoolingType::AVG;
} else if (node.GetOpName() == "nn.l2_pool2d") {
pool_type = arm_compute::PoolingType::L2;
} else {
LOG(FATAL) << "Pooling type not supported";
}
ICHECK(dilation.size() == 2 && dilation[0] == "1" && dilation[1] == "1")
<< "Dilation other than (1, 1) not supported";
arm_compute::PoolingLayerInfo pool_info =
arm_compute::PoolingLayerInfo(pool_type, arm_compute::Size2D(pool_size_h, pool_size_w),
arm_compute::DataLayout::NHWC, pad_stride_info, exclude_pad);
layer->inputs.push_back(MakeACLTensorFromJSONEntry(node.GetInputs()[0]));
layer->outputs.push_back(MakeACLTensorFromJSONNode(node));
auto function = std::make_shared<arm_compute::NEPoolingLayer>();
function->configure(&layer->inputs[0], &layer->outputs[0], pool_info);
layer->function = function;
}
/*!
* \brief Create a global pooling layer.
*
* \note Currently global_max_pool2d and global_avg_pool2d are supported.
*
* \param layer The ACL layer to build. Containing inputs, outputs and the ACL function.
* \param node The JSON representation of the operator.
*/
void CreateGlobalPoolingLayer(CachedLayer* layer, const JSONGraphNode& node) {
arm_compute::PoolingType pool_type;
if (node.GetOpName() == "nn.global_max_pool2d") {
pool_type = arm_compute::PoolingType::MAX;
} else if (node.GetOpName() == "nn.global_avg_pool2d") {
pool_type = arm_compute::PoolingType::AVG;
} else {
LOG(FATAL) << "Pooling type not supported";
}
arm_compute::PoolingLayerInfo pool_info =
arm_compute::PoolingLayerInfo(pool_type, arm_compute::DataLayout::NHWC);
layer->inputs.push_back(MakeACLTensorFromJSONEntry(node.GetInputs()[0]));
layer->outputs.push_back(MakeACLTensorFromJSONNode(node));
auto function = std::make_shared<arm_compute::NEPoolingLayer>();
function->configure(&layer->inputs[0], &layer->outputs[0], pool_info);
layer->function = function;
}
/*!
* \brief Create a reshape layer.
*
* \param layer The ACL layer to build. Containing inputs, outputs and the ACL function.
* \param node The JSON representation of the operator.
*/
void CreateReshapeLayer(CachedLayer* layer, const JSONGraphNode& node) {
layer->inputs.push_back(MakeACLTensorFromJSONEntry(node.GetInputs()[0]));
layer->outputs.push_back(MakeACLTensorFromJSONNode(node));
auto function = std::make_shared<arm_compute::NEReshapeLayer>();
function->configure(&layer->inputs[0], &layer->outputs[0]);
layer->function = function;
}
/*!
* \brief Create a maximum layer.
*
* \param layer The ACL layer to build. Containing inputs, outputs and the ACL function.
* \param node The JSON representation of the operator.
*/
void CreateMaximumLayer(CachedLayer* layer, const JSONGraphNode& node) {
layer->inputs.push_back(MakeACLTensorFromJSONEntry(node.GetInputs()[0]));
layer->inputs.push_back(MakeACLTensorFromJSONEntry(node.GetInputs()[1]));
layer->outputs.push_back(MakeACLTensorFromJSONNode(node));
auto function = std::make_shared<arm_compute::NEElementwiseMax>();
function->configure(&layer->inputs[0], &layer->inputs[1], &layer->outputs[0]);
layer->function = function;
}
/*!
* \brief Creates an add/qnn.add layer
*
* \param layer The ACL layer to build. Containing inputs, outputs and the ACL function.
* \param node The JSON representation of the operator.
*/
void CreateAddLayer(CachedLayer* layer, const JSONGraphNode& node) {
auto op_name = node.GetOpName();
if ("add" == op_name) {
layer->inputs.push_back(MakeACLTensorFromJSONEntry(node.GetInputs()[0]));
layer->inputs.push_back(MakeACLTensorFromJSONEntry(node.GetInputs()[1]));
layer->outputs.push_back(MakeACLTensorFromJSONNode(node));
} else if ("qnn.add" == op_name) {
layer->inputs.push_back(MakeACLTensorFromJSONEntry(node.GetInputs()[0], &node.GetInputs()[2],
&node.GetInputs()[3]));
layer->inputs.push_back(MakeACLTensorFromJSONEntry(node.GetInputs()[1], &node.GetInputs()[4],
&node.GetInputs()[5]));
layer->outputs.push_back(
MakeACLTensorFromJSONNode(node, &node.GetInputs()[6], &node.GetInputs()[7]));
} else {
LOG(FATAL) << "Unsupported form of add op: " + op_name;
}
auto f = std::make_shared<arm_compute::NEArithmeticAddition>();
// SATURATE is used as add_QASYMM8_QASYMM8_QASYMM8 always saturates result
f->configure(&layer->inputs[0], &layer->inputs[1], &layer->outputs[0],
arm_compute::ConvertPolicy::SATURATE);
layer->function = f;
}
/*!
* \brief Create a Concatenate layer.
*
* \param layer The ACL layer to build. Containing inputs, outputs and the ACL function.c
* \param node The JSON representation of the operator.
*/
void CreateConcatenateLayer(CachedLayer* layer, const JSONGraphNode& node) {
std::vector<std::string> axis = node.GetAttr<std::vector<std::string>>("axis");
std::vector<const arm_compute::ITensor*> inputs;
for (auto input : node.GetInputs()) {
layer->inputs.push_back(MakeACLTensorFromJSONEntry(input, nullptr, nullptr, false));
layer->json_inputid_to_layer_inputid[std::pair<uint32_t, uint32_t>(input.id_, input.index_)] =
layer->inputs.size() - 1;
}
for (size_t i = 0; i < layer->inputs.size(); i++) {
inputs.push_back(&layer->inputs[i]);
}
layer->outputs.push_back(MakeACLTensorFromJSONNode(node));
int dimNum = layer->inputs[0].info()->num_dimensions();
auto function = std::make_shared<arm_compute::NEConcatenateLayer>();
// the shape of input tensor will be reversed after passing to ACL
// for example a tensor with shape [1, 2, 3, 4] will be changed to
// [4, 3, 2, 1] at ACL side. So the axis here should be preprocessed.
auto a = std::stoi(axis[0]);
function->configure(inputs, &layer->outputs[0], a < 0 ? -a - 1 : dimNum - a - 1);
layer->function = function;
}
/*! \brief Allow ACL functions to request auxiliary memory from TVM. */
ACLAllocator allocator_;
/*!
* \brief The network layers represented by acl functions.
* \note Currently only supports a single layer.
*/
CachedLayer layer_;
#else
void Run() override {
LOG(FATAL) << "Cannot call run on Arm Compute Library module without runtime enabled. "
<< "Please build with USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR.";
}
void BuildEngine() {
LOG(WARNING) << "Arm Compute Library engine is not initialized. "
<< "Please build with USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR.";
}
#endif
};
ffi::Module ACLRuntimeCreate(const ffi::String& symbol_name, const ffi::String& graph_json,
const ffi::Array<ffi::String>& const_names) {
auto n = ffi::make_object<ACLRuntime>(symbol_name, graph_json, const_names);
return ffi::Module(n);
}
TVM_FFI_STATIC_INIT_BLOCK() {
namespace refl = tvm::ffi::reflection;
refl::GlobalDef()
.def("runtime.arm_compute_lib_runtime_create", ACLRuntimeCreate)
.def("ffi.Module.load_from_bytes.arm_compute_lib",
JSONRuntimeBase::LoadFromBytes<ACLRuntime>);
}
} // namespace contrib
} // namespace runtime
} // namespace tvm