blob: 26ea40075458411ee07b964891bfd480827c8772 [file]
/* * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*!
* \file runtime/contrib/tensorrt/tensorrt_ops.h
* \brief Converters from ops into TensorRT layers. Converters should
* inherit from TensorRTOpConverter and implement the Convert() method.
*/
#ifndef TVM_RUNTIME_CONTRIB_TENSORRT_TENSORRT_OPS_H_
#define TVM_RUNTIME_CONTRIB_TENSORRT_TENSORRT_OPS_H_
#include <cmath>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "../json/json_node.h"
#include "NvInfer.h"
#include "tensorrt_utils.h"
#if TRT_VERSION_GE(6, 0, 1)
#define TRT_HAS_IMPLICIT_BATCH(params) (params->network->hasImplicitBatchDimension())
#else
#define TRT_HAS_IMPLICIT_BATCH(params) (true)
#endif
namespace tvm {
namespace runtime {
namespace contrib {
using JSONGraphNode = tvm::runtime::json::JSONGraphNode;
/*!
* \brief An input to a op may be either kTensor in the case of nvinfer::ITensor*,
* a kWeight for nvinfer1::Weights, or ignored (eg for the nn.pad value).
*/
enum TensorRTInputType { kTensor, kWeight, kIgnored };
/*!
* \brief An input to a TensorRTOpConverter. The type of the input is either kTensor
* or kWeight. For kTensor, "tensor" contains the input tensor. For kWeight,
* "weight" contains the input weight and "weight_shape" contains the shape.
*/
struct TensorRTOpInput {
/*! \brief If type is kTensor, will store input tensor. */
nvinfer1::ITensor* tensor;
/*! \brief If type is kWeight, will store input weight. */
nvinfer1::Weights weight;
/*! \brief Whether the input is in tensor or weight. */
TensorRTInputType type;
/*! \brief If type is kWeight, will store weight shape. */
std::vector<int> weight_shape;
explicit TensorRTOpInput(nvinfer1::ITensor* tensor)
: tensor(tensor), weight({tensor->getType(), nullptr, 0}), type(kTensor) {}
TensorRTOpInput(nvinfer1::Weights weight, const std::vector<int>& shape)
: tensor(nullptr), weight(weight), type(kWeight), weight_shape(shape) {}
};
/*! \brief Parameters to convert an Op to TensorRT. */
struct TensorRTOpConverterParams {
/*! \brief The TRT network that the new layer should be added to. */
nvinfer1::INetworkDefinition* network;
/*! \brief Index of JSON node. */
int nid;
/*! \brief The corresponding JSON node. */
const JSONGraphNode& node;
/*! \brief The type of op. */
std::string op_name;
/*! \brief Inputs to the op. */
std::vector<TensorRTOpInput> inputs;
/*! \brief Outputs of the op should be populated here during Convert(). */
std::vector<nvinfer1::ITensor*> outputs;
/*! \brief Any newly allocated weights should be stored here also. */
std::vector<nvinfer1::Weights>* trt_weights;
TensorRTOpConverterParams(nvinfer1::INetworkDefinition* network, int nid,
const JSONGraphNode& node, std::vector<nvinfer1::Weights>* trt_weights)
: network(network), nid(nid), node(node), trt_weights(trt_weights) {
op_name = node.GetOpName();
}
std::string LayerName() const { return op_name + "(" + std::to_string(nid) + ")"; }
};
/*! \brief Base class for an op converter from Operator to TRT. */
class TensorRTOpConverter {
public:
virtual ~TensorRTOpConverter() = default;
/*! \brief Operator name. */
std::string op_name;
/*! \brief Used to specify whether each input is tensor or weight. */
const std::vector<TensorRTInputType> input_types;
/*! \brief If set to true, any number of tensor inputs can be used for the op. */
const bool variable_input_count;
/*!
* \brief Converter subclasses should call this constructor to set
* input_types or variable_input_count.
* \param input_types For each input to the op, there should be a
* corresponding entry in input_types to determine whether that input should
* be a tensor or a weight. TensorRTBuilder will prepare inputs in
* TensorRTOpConverter according to this.
* \param variable_input_count If the op can have multiple inputs, set this to
* true. input_types vector will be ignored and any number of input tensors
* can be used for this op. All inputs will be tensors and not weights.
*/
TensorRTOpConverter(std::string op_name, const std::vector<TensorRTInputType>& input_types,
bool variable_input_count = false);
/*!
* \brief Convert to TRT. Implementation should use inputs and attributes
* from the CallNode to add the corresponding TRT layers to network. Outputs
* should be pushed to outputs vector.
* \param params Parameters for this op.
*/
virtual void Convert(TensorRTOpConverterParams* params) const = 0;
/*!
* \brief Helper function to reshape a tensor.
* \param params Parameters for this op.
* \param input Tensor to reshape.
* \param new_shape New shape, does not include batch dim.
* \return Reshaped tensor
*/
nvinfer1::ITensor* Reshape(TensorRTOpConverterParams* params, nvinfer1::ITensor* input,
const std::vector<int>& new_shape) const;
/*!
* \brief Helper function to transpose a tensor.
* \param params Parameters for this op.
* \param input Tensor to transpose.
* \param order New order of axes, does include batch dim.
* \return Transposed tensor
*/
nvinfer1::ITensor* Transpose(TensorRTOpConverterParams* params, nvinfer1::ITensor* input,
const std::vector<int>& order) const;
/*!
* \brief Helper function to convert an axis to TRT format.
* \param axis Axis from TVM.
* \param input_rank Rank of input, does not include batch dim.
* \return Axis in TRT format.
*/
int ConvertAxis(TensorRTOpConverterParams* params, int axis, int input_rank) const;
/*!
* \brief Create constant that is broadcastable.
* \param params Parameters for this op.
* \param value Value of scalar.
* \param broadcast_to_dims Dims that scalar should be broadcastable against.
* \return Constant tensor.
*/
nvinfer1::ITensor* CreateScalar(TensorRTOpConverterParams* params, float value,
const nvinfer1::Dims& broadcast_to_dims) const;
/*!
* \brief Get pre/post padding values from padding attributes array.
* \param padding Serialized padding from op attributes.
* \param padding_is_asymmetric True if both pre and post are needed for asymmetric padding.
* \param prepadding Prepadding value or symmetric padding values if !padding_is_asymmetric.
* \param postpadding Postpadding value if padding_is_asymmetric.
*/
void GetPadding(const ffi::Array<int64_t>& padding, bool* use_asymmetric_padding,
nvinfer1::DimsHW* prepadding, nvinfer1::DimsHW* postpadding) const;
/*!
* \brief Get pre/post padding values from padding attributes array for volumetric ops.
* \param padding Serialized padding from op attributes.
* \param padding_is_asymmetric True if both pre and post are needed for asymmetric padding.
* \param prepadding Prepadding value or symmetric padding values if !padding_is_asymmetric.
* \param postpadding Postpadding value if padding_is_asymmetric.
*/
void GetPadding3D(const ffi::Array<int64_t>& padding, bool* use_asymmetric_padding,
nvinfer1::Dims* prepadding, nvinfer1::Dims* postpadding) const;
};
/*!
* \brief Get the map of available TensorRTOpConverters, where the key is the name of the op.
* \return Map of TensorRTOpConverters.
*/
const std::unordered_map<std::string, std::unique_ptr<TensorRTOpConverter>>& GetOpConverters();
} // namespace contrib
} // namespace runtime
} // namespace tvm
#endif // TVM_RUNTIME_CONTRIB_TENSORRT_TENSORRT_OPS_H_