| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| /*! |
| * \file src/runtime/contrib/tensorrt/tensorrt_runtime.cc |
| * \brief JSON runtime implementation for TensorRT. |
| */ |
| |
| #include <tvm/ffi/extra/json.h> |
| #include <tvm/ffi/function.h> |
| #include <tvm/ffi/reflection/registry.h> |
| #include <tvm/runtime/tensor.h> |
| |
| #include <fstream> |
| #include <memory> |
| #include <string> |
| #include <unordered_map> |
| #include <vector> |
| |
| #include "../../../support/env.h" |
| #include "../../file_utils.h" |
| #include "../json/json_node.h" |
| #include "../json/json_runtime.h" |
| |
| #ifdef TVM_GRAPH_EXECUTOR_TENSORRT |
| #include "NvInfer.h" |
| #include "tensorrt_builder.h" |
| #include "tensorrt_calibrator.h" |
| #include "tensorrt_utils.h" |
| #endif |
| |
| namespace tvm { |
| namespace runtime { |
| namespace contrib { |
| |
| struct PairHash { |
| template <class T1, class T2> |
| std::size_t operator()(const std::pair<T1, T2>& pair) const { |
| return std::hash<T1>()(pair.first) ^ std::hash<T2>()(pair.second); |
| } |
| }; |
| |
| using namespace tvm::runtime::json; |
| |
| class TensorRTRuntime : public JSONRuntimeBase { |
| public: |
| /*! |
| * \brief The TensorRT runtime module. Deserialize the provided functions |
| * on creation and store in the layer cache. |
| * |
| * \param symbol_name The name of the function. |
| * \param graph_json serialized JSON representation of a sub-graph. |
| * \param const_names The names of each constant in the sub-graph. |
| */ |
| explicit TensorRTRuntime(const std::string& symbol_name, const std::string& graph_json, |
| const ffi::Array<ffi::String>& const_names) |
| : JSONRuntimeBase(symbol_name, graph_json, const_names), |
| use_implicit_batch_(true), |
| max_workspace_size_(size_t(1) << 30), |
| max_batch_size_(-1), |
| multi_engine_mode_(false), |
| use_fp16_(false) { |
| const bool use_int8 = support::GetEnv("TVM_TENSORRT_USE_INT8", false); |
| multi_engine_mode_ = support::GetEnv("TVM_TENSORRT_MULTI_ENGINE", false); |
| num_calibration_batches_remaining_ = support::GetEnv("TENSORRT_NUM_CALI_INT8", 0); |
| if (use_int8) { |
| ICHECK(num_calibration_batches_remaining_ != 0) |
| << "When using INT8 mode, " |
| << "environment variable TENSORRT_NUM_CALI_INT8" |
| << "must also be set to specify the number of " |
| << "calibration times"; |
| LOG(INFO) << "settiing up " << num_calibration_batches_remaining_ |
| << " sample data to calibrate data ... "; |
| ICHECK(multi_engine_mode_ == false) << "When using int8 mode, " |
| << "multi-engine is not allowed"; |
| } |
| } |
| |
| /*! |
| * \brief The type key of the module. |
| * |
| * \return module type key. |
| */ |
| const char* kind() const final { return "tensorrt"; } |
| |
| /*! \brief Get the property of the runtime module .*/ |
| int GetPropertyMask() const final { |
| return ffi::Module::kBinarySerializable | ffi::Module::kRunnable; |
| } |
| |
| /*! |
| * \brief Initialize runtime. Create TensorRT layer from JSON |
| * representation. |
| * |
| * \param consts The constant params from compiled model. |
| */ |
| void Init(const ffi::Array<Tensor>& consts) override { |
| ICHECK_EQ(consts.size(), const_idx_.size()) |
| << "The number of input constants must match the number of required."; |
| LoadGlobalAttributes(); |
| SetupConstants(consts); |
| GetCachedEnginesFromDisk(); |
| } |
| |
| void LoadGlobalAttributes() { |
| // These settings are global to the entire subgraph. Codegen will add them as attributes to all |
| // op nodes. Read from first one. |
| for (size_t i = 0; i < nodes_.size(); ++i) { |
| if (nodes_[i].HasAttr("use_implicit_batch") && nodes_[i].HasAttr("max_workspace_size")) { |
| use_implicit_batch_ = static_cast<int>(nodes_[i].GetAttr<int64_t>("use_implicit_batch")); |
| // Allow max_workspace_size to be overridden at runtime. |
| size_t runtime_max_workspace_size = |
| support::GetEnv("TVM_TENSORRT_MAX_WORKSPACE_SIZE", size_t(0)); |
| if (runtime_max_workspace_size != 0) { |
| max_workspace_size_ = runtime_max_workspace_size; |
| } else { |
| max_workspace_size_ = |
| static_cast<size_t>(nodes_[i].GetAttr<int64_t>("max_workspace_size")); |
| } |
| } |
| if (nodes_[i].HasAttr("use_fp16")) { |
| use_fp16_ = static_cast<int>(nodes_[i].GetAttr<int64_t>("use_fp16")); |
| } |
| } |
| } |
| |
| #ifdef TVM_GRAPH_EXECUTOR_TENSORRT |
| /*! \brief Destroy engines and contexts. */ |
| void DestroyEngines() { |
| for (auto& it : trt_engine_cache_) { |
| VLOG(1) << "Destroying TensorRT context for function '" << it.first.first << "' (batch size " |
| << it.first.second << ")"; |
| it.second.context->destroy(); |
| VLOG(1) << "Destroying TensorRT engine for function '" << it.first.first << "' (batch size " |
| << it.first.second << ")"; |
| it.second.engine->destroy(); |
| } |
| trt_engine_cache_.clear(); |
| } |
| |
| ~TensorRTRuntime() override { |
| VLOG(1) << "Destroying TensorRT runtime"; |
| DestroyEngines(); |
| VLOG(1) << "Destroyed TensorRT runtime"; |
| } |
| |
| /*! \brief Run inference using built engine. */ |
| void Run() override { |
| auto& engine_and_context = GetOrBuildEngine(); |
| int batch_size = GetBatchSize(); |
| if (batch_size == 0) return; |
| auto engine = engine_and_context.engine; |
| auto context = engine_and_context.context; |
| const int num_bindings = engine->getNbBindings(); |
| std::vector<void*> bindings(num_bindings, nullptr); |
| std::vector<size_t> binding_sizes(num_bindings, 0); |
| // Setup input bindings. |
| for (size_t i = 0; i < input_nodes_.size(); ++i) { |
| auto nid = input_nodes_[i]; |
| if (nodes_[nid].GetOpType() == "input") { |
| for (size_t j = 0; j < nodes_[nid].GetOpShape().size(); ++j) { |
| uint32_t eid = EntryID(nid, j); |
| const std::string name = nodes_[nid].GetOpName() + "_" + std::to_string(j); |
| int binding_index = engine->getBindingIndex(name.c_str()); |
| ICHECK_NE(binding_index, -1); |
| #if TRT_VERSION_GE(6, 0, 1) |
| if (!use_implicit_batch_) { |
| std::vector<int64_t> shape(data_entry_[eid]->shape, |
| data_entry_[eid]->shape + data_entry_[eid]->ndim); |
| auto dims = VectorToTrtDims(shape); |
| ICHECK(context->setBindingDimensions(binding_index, dims)); |
| } |
| #endif |
| if (data_entry_[eid]->device.device_type == kDLCUDA) { |
| bindings[binding_index] = data_entry_[eid]->data; |
| } else { |
| auto device_buffer = GetOrAllocateDeviceBuffer(eid, binding_index); |
| device_buffer.CopyFrom(data_entry_[eid]); |
| bindings[binding_index] = device_buffer->data; |
| } |
| |
| auto dims = engine->getBindingDimensions(binding_index); |
| int num_elements = 1; |
| for (int i = 0; i < dims.nbDims; ++i) num_elements *= dims.d[i]; |
| binding_sizes[binding_index] = num_elements; |
| } |
| } |
| } |
| |
| // add batch data to calibrator |
| if (num_calibration_batches_remaining_ > 0) { |
| if (calibrator_ != nullptr) { |
| LOG(INFO) << "Starting adding last " << num_calibration_batches_remaining_ |
| << "-th batch data to the calibrator"; |
| calibrator_->AddBatchData(bindings, binding_sizes); |
| num_calibration_batches_remaining_--; |
| } |
| return; |
| } |
| |
| // Setup output bindings. |
| for (size_t i = 0; i < outputs_.size(); ++i) { |
| uint32_t eid = EntryID(outputs_[i]); |
| const std::string& name = engine_and_context.outputs[i]; |
| int binding_index = engine->getBindingIndex(name.c_str()); |
| ICHECK_NE(binding_index, -1); |
| if (data_entry_[eid]->device.device_type == kDLCUDA) { |
| bindings[binding_index] = data_entry_[eid]->data; |
| } else { |
| auto device_buffer = GetOrAllocateDeviceBuffer(eid, binding_index); |
| bindings[binding_index] = device_buffer->data; |
| } |
| } |
| |
| #if TRT_VERSION_GE(6, 0, 1) |
| if (use_implicit_batch_) { |
| ICHECK(context->execute(batch_size, bindings.data())) << "Running TensorRT failed."; |
| } else { |
| ICHECK(context->executeV2(bindings.data())) << "Running TensorRT failed."; |
| } |
| #else |
| ICHECK(context->execute(batch_size, bindings.data())) << "Running TensorRT failed."; |
| #endif |
| |
| // Copy outputs from GPU buffers if needed. |
| for (size_t i = 0; i < outputs_.size(); ++i) { |
| uint32_t eid = EntryID(outputs_[i]); |
| const std::string& name = engine_and_context.outputs[i]; |
| int binding_index = engine->getBindingIndex(name.c_str()); |
| ICHECK_NE(binding_index, -1); |
| if (data_entry_[eid]->device.device_type != kDLCUDA) { |
| auto device_buffer = GetOrAllocateDeviceBuffer(eid, binding_index); |
| device_buffer.CopyTo(const_cast<DLTensor*>(data_entry_[eid])); |
| } |
| } |
| } |
| |
| private: |
| /*! \brief Get batch size for engine from the runtime input shapes. */ |
| int GetBatchSize() { |
| return data_entry_[input_var_eid_[0]]->ndim == 0 ? 1 : data_entry_[input_var_eid_[0]]->shape[0]; |
| } |
| |
| /*! \brief Find an engine in the cache which we can reuse depending on the mode. If no compatible |
| * engine exists, return false to indicate that a new one should be built. */ |
| bool FindCompatibleEngine(int batch_size, int* compatible_engine_batch_size) { |
| if (multi_engine_mode_) { |
| // Exact match is required for multi engine mode. |
| if (trt_engine_cache_.count(std::make_pair(symbol_name_, batch_size))) { |
| *compatible_engine_batch_size = batch_size; |
| return true; |
| } |
| return false; |
| } |
| // Check for engine with compatible max_batch_size. |
| if (batch_size <= max_batch_size_) { |
| *compatible_engine_batch_size = max_batch_size_; |
| return true; |
| } |
| return false; |
| } |
| |
| /*! |
| * \brief Build TensorRT engine from JSON representation and cache it. If compatible engine is |
| * already built, do nothing. |
| */ |
| TensorRTEngineAndContext& GetOrBuildEngine() { |
| int batch_size = GetBatchSize(); |
| int compatible_engine_batch_size = -1; |
| bool find_engine_flag = FindCompatibleEngine(batch_size, &compatible_engine_batch_size); |
| const bool use_int8 = (support::GetEnv("TVM_TENSORRT_USE_INT8", 0) != 0); |
| const bool int8_calibration_not_used_or_not_complete = |
| (calibrator_ != nullptr && num_calibration_batches_remaining_ != 0); |
| if (find_engine_flag && |
| (!use_int8 || calibrator_ == nullptr || int8_calibration_not_used_or_not_complete)) { |
| // A compatible engine already exists. |
| return trt_engine_cache_.at(std::make_pair(symbol_name_, compatible_engine_batch_size)); |
| } |
| |
| // For single engine mode, remove previous engine and update max_batch_size. |
| if (!multi_engine_mode_) { |
| DestroyEngines(); |
| max_batch_size_ = batch_size; |
| } |
| DLOG(INFO) << "Building new TensorRT engine for subgraph " << symbol_name_ |
| << " with batch size " << batch_size; |
| |
| // Build engine. |
| if (calibrator_ != nullptr && num_calibration_batches_remaining_ == 0) { |
| // Calibration complete and build int8 engine |
| BuildEngineFromJson(batch_size); |
| calibrator_.reset(nullptr); |
| } else { |
| // Build new engine |
| BuildEngineFromJson(batch_size); |
| TensorRTEngineAndContext& engine_and_context = |
| trt_engine_cache_[std::make_pair(symbol_name_, batch_size)]; |
| if (use_int8) { |
| this->CreateInt8Calibrator(engine_and_context); |
| } |
| } |
| |
| VLOG(1) << "Finished building TensorRT engine for subgraph " << symbol_name_ |
| << " with batch size " << batch_size; |
| CacheEngineToDisk(); |
| return trt_engine_cache_.at(std::make_pair(symbol_name_, batch_size)); |
| } |
| |
| void BuildEngineFromJson(int batch_size) { |
| const bool use_fp16 = support::GetEnv("TVM_TENSORRT_USE_FP16", false) || use_fp16_; |
| TensorRTBuilder builder(&logger_, data_entry_, max_workspace_size_, use_implicit_batch_, |
| use_fp16, batch_size, calibrator_.get()); |
| for (size_t i = 0; i < input_nodes_.size(); ++i) { |
| auto nid = input_nodes_[i]; |
| const auto& node = nodes_[nid]; |
| std::string name = node.GetOpName(); |
| if (node.GetOpType() == "input") { |
| builder.AddInput(nid, EntryID(nid, 0), node); |
| } else { |
| ICHECK_EQ(node.GetOpType(), "const"); |
| uint32_t eid = EntryID(nid, 0); |
| builder.AddConstant(nid, data_entry_[eid]); |
| } |
| } |
| |
| // Add layers. |
| for (size_t nid = 0; nid < nodes_.size(); ++nid) { |
| const auto& node = nodes_[nid]; |
| if (node.GetOpType() != "kernel") continue; |
| builder.AddLayer(nid, node); |
| } |
| |
| // Add outputs. |
| for (size_t i = 0; i < outputs_.size(); ++i) { |
| builder.AddOutput(outputs_[i], EntryID(outputs_[i])); |
| } |
| |
| TensorRTEngineAndContext engine_and_context = builder.BuildEngine(); |
| trt_engine_cache_[std::make_pair(symbol_name_, batch_size)] = engine_and_context; |
| } |
| |
| /*! \brief If TVM_TENSORRT_CACHE_DIR is set, will check that directory for |
| * already built TRT engines and load into trt_engine_cache_ so they don't |
| * have to be built at first inference. |
| */ |
| bool GetCachedEnginesFromDisk() { |
| std::string cache_dir = support::GetEnv("TVM_TENSORRT_CACHE_DIR", std::string("")); |
| if (cache_dir.empty()) return false; |
| std::string key = GetSubgraphKey(); |
| std::string path = cache_dir + "/" + key + ".plan"; |
| // Check if engine is in the cache. |
| std::ifstream infile(path, std::ios::binary); |
| if (!infile.good()) return false; |
| LOG(INFO) << "Loading cached TensorRT engine from " << path; |
| infile.close(); |
| std::string serialized_engine; |
| LoadBinaryFromFile(path, &serialized_engine); |
| // Deserialize engine |
| nvinfer1::IRuntime* runtime = nvinfer1::createInferRuntime(logger_); |
| TensorRTEngineAndContext engine_and_context; |
| engine_and_context.engine = |
| runtime->deserializeCudaEngine(&serialized_engine[0], serialized_engine.size(), nullptr); |
| engine_and_context.context = engine_and_context.engine->createExecutionContext(); |
| // Load metadata |
| namespace json = ::tvm::ffi::json; |
| std::string meta_path = cache_dir + "/" + key + ".meta"; |
| std::string serialized_meta; |
| LoadBinaryFromFile(meta_path, &serialized_meta); |
| auto meta_obj = json::Parse(serialized_meta).cast<json::Object>(); |
| int batch_size; |
| // Read inputs |
| { |
| auto arr = meta_obj.at(ffi::String("inputs")).cast<json::Array>(); |
| engine_and_context.inputs.clear(); |
| for (const auto& v : arr) { |
| engine_and_context.inputs.push_back(std::string(v.cast<ffi::String>())); |
| } |
| } |
| // Read outputs |
| { |
| auto arr = meta_obj.at(ffi::String("outputs")).cast<json::Array>(); |
| engine_and_context.outputs.clear(); |
| for (const auto& v : arr) { |
| engine_and_context.outputs.push_back(std::string(v.cast<ffi::String>())); |
| } |
| } |
| // Read batch_size |
| batch_size = static_cast<int>(meta_obj.at(ffi::String("batch_size")).cast<int64_t>()); |
| trt_engine_cache_[std::make_pair(symbol_name_, batch_size)] = engine_and_context; |
| max_batch_size_ = batch_size; |
| LOG(INFO) << "finished loading engine and context ... "; |
| return true; |
| } |
| |
| /*! \brief If TVM_TENSORRT_CACHE_DIR is set, will save the engine to that |
| * directory so it can be loaded later. |
| */ |
| void CacheEngineToDisk() { |
| int batch_size = GetBatchSize(); |
| std::string cache_dir = support::GetEnv("TVM_TENSORRT_CACHE_DIR", std::string("")); |
| if (cache_dir.empty()) return; |
| std::string key = GetSubgraphKey(); |
| std::string path = cache_dir + "/" + key + ".plan"; |
| DLOG(INFO) << "Caching TensorRT engine to " << path; |
| // Serialize engine to disk |
| nvinfer1::IHostMemory* serialized_engine = |
| trt_engine_cache_[std::make_pair(symbol_name_, batch_size)].engine->serialize(); |
| SaveBinaryToFile(path, std::string(static_cast<const char*>(serialized_engine->data()), |
| serialized_engine->size())); |
| serialized_engine->destroy(); |
| // Serialize metadata |
| namespace json = ::tvm::ffi::json; |
| json::Object meta_obj; |
| { |
| json::Array inputs_arr; |
| for (const auto& s : trt_engine_cache_[std::make_pair(symbol_name_, batch_size)].inputs) { |
| inputs_arr.push_back(ffi::String(s)); |
| } |
| meta_obj.Set(ffi::String("inputs"), std::move(inputs_arr)); |
| } |
| { |
| json::Array outputs_arr; |
| for (const auto& s : trt_engine_cache_[std::make_pair(symbol_name_, batch_size)].outputs) { |
| outputs_arr.push_back(ffi::String(s)); |
| } |
| meta_obj.Set(ffi::String("outputs"), std::move(outputs_arr)); |
| } |
| meta_obj.Set(ffi::String("batch_size"), static_cast<int64_t>(batch_size)); |
| std::string meta_path = cache_dir + "/" + key + ".meta"; |
| SaveBinaryToFile(meta_path, std::string(json::Stringify(meta_obj))); |
| } |
| |
| std::string GetSubgraphKey() { |
| // Using this key will only allow a single model per TVM_TENSORRT_CACHE_DIR directory. We could |
| // instead use a hash of graph_json and all weights to allow many models in the same directory, |
| // but the cost of computing the hash is high. |
| return symbol_name_ + (support::GetEnv("TVM_TENSORRT_USE_FP16", false) ? "_fp16" : "_fp32"); |
| } |
| |
| /*! \brief Retreive a GPU buffer for input or output or allocate if needed. */ |
| Tensor GetOrAllocateDeviceBuffer(int entry_id, int binding_index) { |
| std::vector<int64_t> shape(data_entry_[entry_id]->shape, |
| data_entry_[entry_id]->shape + data_entry_[entry_id]->ndim); |
| if (device_buffers_.count(binding_index)) { |
| // Buffer is already initialized. |
| if (shape[0] > device_buffers_[binding_index]->shape[0]) { |
| // Buffer is too small. Need to allocate bigger buffer. |
| device_buffers_[binding_index] = |
| runtime::Tensor::Empty(shape, data_entry_[entry_id]->dtype, {kDLCUDA, 0}); |
| } else if (shape[0] < device_buffers_[binding_index]->shape[0]) { |
| // Buffer is too large. Create view. |
| return device_buffers_[binding_index].CreateView(shape, data_entry_[entry_id]->dtype); |
| } |
| } else { |
| // Buffer not initialized yet. |
| device_buffers_[binding_index] = |
| runtime::Tensor::Empty(shape, data_entry_[entry_id]->dtype, {kDLCUDA, 0}); |
| } |
| return device_buffers_.at(binding_index); |
| } |
| |
| void CreateInt8Calibrator(const TensorRTEngineAndContext& engine_and_context) { |
| // Get input names in binding order. |
| std::vector<std::string> input_names; |
| for (size_t i = 0; i < engine_and_context.inputs.size(); i++) { |
| std::string ele = engine_and_context.inputs[i]; |
| input_names.push_back(ele); |
| } |
| const int batch_size = GetBatchSize(); |
| calibrator_.reset(new TensorRTCalibrator(batch_size, input_names)); |
| } |
| |
| /*! \brief Map of function name and max batch size to TRT engine if built already. */ |
| std::unordered_map<std::pair<std::string, int>, TensorRTEngineAndContext, PairHash> |
| trt_engine_cache_; |
| |
| /*! \brief Calibrator for INT8 mode. */ |
| std::unique_ptr<TensorRTCalibrator> calibrator_; |
| |
| /*! \brief Map of inding index to GPU buffers for inputs and outputs. Only used when target device |
| * is not "cuda". Since TensorRT execution can only read data from GPU, we need to copy data from |
| * the runtime device to these buffers first. These will be allocated for the highest batch size |
| * used by all engines. */ |
| std::unordered_map<int, Tensor> device_buffers_; |
| |
| /*! \brief TensorRT logger. */ |
| TensorRTLogger logger_; |
| |
| #else // TVM_GRAPH_EXECUTOR_TENSORRT |
| void Run() override { |
| LOG(FATAL) << "TensorRT runtime is not enabled. " |
| << "Please build with USE_TENSORRT_RUNTIME."; |
| } |
| |
| void BuildEngine() { |
| LOG(WARNING) << "TensorRT runtime is not enabled. " |
| << "Please build with USE_TENSORRT_RUNTIME."; |
| } |
| |
| bool GetCachedEnginesFromDisk() { return false; } |
| |
| void CacheEngineToDisk() {} |
| #endif // TVM_GRAPH_EXECUTOR_TENSORRT |
| |
| bool use_implicit_batch_; |
| |
| size_t max_workspace_size_; |
| |
| /*! \brief Number of calibration batches until we are done. */ |
| int num_calibration_batches_remaining_; |
| |
| /*! \brief Highest batch size that an engine has been built for, used in single-engine mode only |
| * (multi_engine_mode=false). */ |
| int max_batch_size_; |
| |
| /*! \brief The strategy to use for dynamic batching. With multi_engine_mode=true, a new TensorRT |
| * engine is created for each unique batch size encountered. With multi_engine_mode=false, only |
| * one TensorRT engine is alive at any given time. It is replaced if a higher batch size is |
| * encountered. Multi-engine mode should give better performance, at a cost of higher memory usage |
| * and more time spent building engines. */ |
| bool multi_engine_mode_; |
| |
| /*! \brief Use auto-conversion to fp16 */ |
| bool use_fp16_; |
| }; |
| |
| ffi::Module TensorRTRuntimeCreate(const ffi::String& symbol_name, const ffi::String& graph_json, |
| const ffi::Array<ffi::String>& const_names) { |
| auto n = ffi::make_object<TensorRTRuntime>(symbol_name, graph_json, const_names); |
| return ffi::Module(n); |
| } |
| |
| TVM_FFI_STATIC_INIT_BLOCK() { |
| namespace refl = tvm::ffi::reflection; |
| refl::GlobalDef() |
| .def("runtime.tensorrt_runtime_create", TensorRTRuntimeCreate) |
| .def("ffi.Module.load_from_bytes.tensorrt", JSONRuntimeBase::LoadFromBytes<TensorRTRuntime>); |
| } |
| |
| } // namespace contrib |
| } // namespace runtime |
| } // namespace tvm |