src/target/target_kind.cc - tvm - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 /*!
  * \file src/target/target_kind.cc
  * \brief Target kind registry
  */
 #include <tvm/ir/expr.h>
 #include <tvm/runtime/device_api.h>
 #include <tvm/runtime/registry.h>
 #include <tvm/target/target.h>
 #include <tvm/target/target_kind.h>

 #include <algorithm>

 #include "../node/attr_registry.h"
 #include "./parsers/cpu.h"

 namespace tvm {

 TVM_REGISTER_NODE_TYPE(TargetKindNode);

 TVM_STATIC_IR_FUNCTOR(ReprPrinter, vtable)
     .set_dispatch<TargetKindNode>([](const ObjectRef& obj, ReprPrinter* p) {
       const TargetKind& kind = Downcast<TargetKind>(obj);
       p->stream << kind->name;
     });

 /**********  Registry-related code  **********/

 using TargetKindRegistry = AttrRegistry<TargetKindRegEntry, TargetKind>;

 Array<String> TargetKindRegEntry::ListTargetKinds() {
   return TargetKindRegistry::Global()->ListAllNames();
 }

 Map<String, String> TargetKindRegEntry::ListTargetKindOptions(const TargetKind& target_kind) {
   Map<String, String> options;
   for (const auto& kv : target_kind->key2vtype_) {
     options.Set(kv.first, kv.second.type_key);
   }
   return options;
 }

 TargetKindRegEntry& TargetKindRegEntry::RegisterOrGet(const String& target_kind_name) {
   return TargetKindRegistry::Global()->RegisterOrGet(target_kind_name);
 }

 void TargetKindRegEntry::UpdateAttr(const String& key, TVMRetValue value, int plevel) {
   TargetKindRegistry::Global()->UpdateAttr(key, kind_, value, plevel);
 }

 const AttrRegistryMapContainerMap<TargetKind>& TargetKind::GetAttrMapContainer(
     const String& attr_name) {
   return TargetKindRegistry::Global()->GetAttrMap(attr_name);
 }

 Optional<TargetKind> TargetKind::Get(const String& target_kind_name) {
   const TargetKindRegEntry* reg = TargetKindRegistry::Global()->Get(target_kind_name);
   if (reg == nullptr) {
     return NullOpt;
   }
   return reg->kind_;
 }

 /**********  Utility functions  **********/

 /*!
  * \brief Extract a number from the string with the given prefix.
  * For example, when `str` is "sm_20" and `prefix` is "sm_".
  * This function first checks if `str` starts with `prefix`,
  * then return the integer 20 after the `prefix`
  * \param str The string to be extracted
  * \param prefix The prefix to be checked
  * \return An integer, the extracted number. -1 if the check fails
  */
 static int ExtractIntWithPrefix(const std::string& str, const std::string& prefix) {
   if (str.substr(0, prefix.size()) != prefix) {
     return -1;
   }
   int result = 0;
   for (size_t i = prefix.size(); i < str.size(); ++i) {
     char c = str[i];
     if (!isdigit(c)) {
       return -1;
     }
     result = result * 10 + c - '0';
   }
   return result;
 }

 /*!
  * \brief Using TVM DeviceAPI to detect the device flag
  * \param device The device to be detected
  * \param flag The device flag to be detected
  * \param val The detected value
  * \return A boolean indicating if detection succeeds
  */
 static bool DetectDeviceFlag(Device device, runtime::DeviceAttrKind flag, TVMRetValue* val) {
   using runtime::DeviceAPI;
   DeviceAPI* api = DeviceAPI::Get(device, true);
   // Check if compiled with the corresponding device api
   if (api == nullptr) {
     return false;
   }
   // Check if the device exists
   api->GetAttr(device, runtime::kExist, val);
   int exists = *val;
   if (!exists) {
     return false;
   }
   // Get the arch of the device
   DeviceAPI::Get(device)->GetAttr(device, flag, val);
   return true;
 }

 void CheckOrSetAttr(Map<String, ObjectRef>* attrs, const String& name, const String& value) {
   auto iter = attrs->find(name);
   if (iter == attrs->end()) {
     attrs->Set(name, value);
   } else {
     const auto* str = (*iter).second.as<StringObj>();
     ICHECK(str != nullptr && GetRef<String>(str) == value)
         << "ValueError: Expects \"" << name << "\" to be \"" << value
         << "\", but gets: " << (*iter).second;
   }
 }

 /**********  Target kind attribute updaters  **********/

 /*!
  * \brief Update the attributes in the CUDA target.
  * \param target The Target to update
  * \return The updated attributes
  */
 TargetJSON UpdateCUDAAttrs(TargetJSON target) {
   // Update -arch=sm_xx
   int archInt;
   if (target.count("arch")) {
     // If -arch has been specified, validate the correctness
     String archStr = Downcast<String>(target.at("arch"));
     archInt = ExtractIntWithPrefix(archStr, "sm_");
     ICHECK(archInt != -1) << "ValueError: CUDA target gets an invalid CUDA arch: -arch=" << archStr;
   } else {
     // Use the compute version of the first CUDA GPU instead
     TVMRetValue version;
     if (!DetectDeviceFlag({kDLCUDA, 0}, runtime::kComputeVersion, &version)) {
       LOG(WARNING) << "Unable to detect CUDA version, default to \"-arch=sm_20\" instead";
       archInt = 20;
     } else {
       archInt = std::stod(version.operator std::string()) * 10 + 0.1;
     }
     target.Set("arch", String("sm_") + std::to_string(archInt));
   }
   return target;
 }

 /*!
  * \brief Update the attributes in the LLVM NVPTX target.
  * \param target The Target to update
  * \return The updated attributes
  */
 TargetJSON UpdateNVPTXAttrs(TargetJSON target) {
   CheckOrSetAttr(&target, "mtriple", "nvptx64-nvidia-cuda");
   // Update -mcpu=sm_xx
   int arch;
   if (target.count("mcpu")) {
     // If -mcpu has been specified, validate the correctness
     String mcpu = Downcast<String>(target.at("mcpu"));
     arch = ExtractIntWithPrefix(mcpu, "sm_");
     ICHECK(arch != -1) << "ValueError: NVPTX target gets an invalid CUDA arch: -mcpu=" << mcpu;
   } else {
     // Use the compute version of the first CUDA GPU instead
     TVMRetValue version;
     if (!DetectDeviceFlag({kDLCUDA, 0}, runtime::kComputeVersion, &version)) {
       LOG(WARNING) << "Unable to detect CUDA version, default to \"-mcpu=sm_20\" instead";
       arch = 20;
     } else {
       arch = std::stod(version.operator std::string()) * 10 + 0.1;
     }
     target.Set("mcpu", String("sm_") + std::to_string(arch));
   }
   return target;
 }

 /*!
  * \brief Update the attributes in the LLVM ROCm target.
  * \param target The Target to update
  * \return The updated attributes
  */
 TargetJSON UpdateROCmAttrs(TargetJSON target) {
   CheckOrSetAttr(&target, "mtriple", "amdgcn-amd-amdhsa-hcc");
   // Update -mcpu=gfx
   int arch;
   if (target.count("mcpu")) {
     String mcpu = Downcast<String>(target.at("mcpu"));
     arch = ExtractIntWithPrefix(mcpu, "gfx");
     ICHECK(arch != -1) << "ValueError: ROCm target gets an invalid GFX version: -mcpu=" << mcpu;
   } else {
     TVMRetValue val;
     if (!DetectDeviceFlag({kDLROCM, 0}, runtime::kGcnArch, &val)) {
       LOG(WARNING) << "Unable to detect ROCm compute arch, default to \"-mcpu=gfx900\" instead";
       arch = 900;
     } else {
       arch = val.operator int();
     }
     target.Set("mcpu", String("gfx") + std::to_string(arch));
   }
   // Update -mattr before ROCm 3.5:
   //   Before ROCm 3.5 we needed code object v2, starting
   //   with 3.5 we need v3 (this argument disables v3)

   TVMRetValue val;
   int version;
   if (!DetectDeviceFlag({kDLROCM, 0}, runtime::kApiVersion, &val)) {
     LOG(WARNING) << "Unable to detect ROCm version, assuming >= 3.5";
     version = 305;
   } else {
     version = val.operator int();
   }
   if (version < 305) {
     Array<String> mattr;
     if (target.count("mattr")) {
       mattr = Downcast<Array<String>>(target.at("mattr"));
     }
     mattr.push_back("-code-object-v3");
     target.Set("mattr", mattr);
   }
   return target;
 }

 /*!
  * \brief Test Target Parser
  * \param target The Target to update
  * \return The updated attributes
  */
 TargetJSON TestTargetParser(TargetJSON target) {
   Map<String, ObjectRef> features = {{"is_test", Bool(true)}};
   target.Set("features", features);
   return target;
 }

 /**********  Register Target kinds and attributes  **********/

 TVM_REGISTER_TARGET_KIND("llvm", kDLCPU)
     .add_attr_option<Array<String>>("mattr")
     .add_attr_option<String>("mcpu")
     .add_attr_option<String>("mtriple")
     .add_attr_option<String>("mfloat-abi")
     .add_attr_option<String>("mabi")
     .add_attr_option<Integer>("num-cores")
     // Fast math flags, see https://llvm.org/docs/LangRef.html#fast-math-flags
     .add_attr_option<Bool>("fast-math")  // implies all the below
     .add_attr_option<Bool>("fast-math-nnan")
     .add_attr_option<Bool>("fast-math-ninf")
     .add_attr_option<Bool>("fast-math-nsz")
     .add_attr_option<Bool>("fast-math-arcp")
     .add_attr_option<Bool>("fast-math-contract")
     .add_attr_option<Bool>("fast-math-reassoc")
     .add_attr_option<Integer>("opt-level")
     // LLVM command line flags, see below
     .add_attr_option<Array<String>>("cl-opt")
     .set_default_keys({"cpu"})
     // Force the external codegen kind attribute to be registered, even if no external
     // codegen targets are enabled by the TVM build.
     .set_attr<Bool>(tvm::attr::kIsExternalCodegen, Bool(false))
     .set_target_parser(tvm::target::parsers::cpu::ParseTarget);

 // Note regarding the "cl-opt" attribute:
 // Each string in the array has the format
 //   -optionname[[:type]=value]
 // where
 //   * optionname is the actual LLVM option (e.g. "unroll-threshold")
 //   * type is one of "bool", "int", "uint", or "string"
 //   * value is the corresponding option value (for "bool" type is can be 0 or "false"
 //     for false value, or 1 or "true" for true value)
 // If type is omitted, it is assumed to be "bool". If value is omitted, it is assumed
 // to be "true".
 //
 // The type must match the option type in LLVM. To find the type, search the LLVM
 // repository (https://github.com/llvm/llvm-project) for optionname, and look for
 // its definition: it will be a declaration of a variable of type cl::opt<T> with
 // optionname being an argument to the constructor. The T in the declaration is
 // the type.
 // For example, for unroll-threshold, we get the following declaration:
 // static cl::opt<unsigned>
 //     UnrollThreshold("unroll-threshold", cl::Hidden,
 //                     cl::desc("The cost threshold for loop unrolling"));
 // Hence the type is "uint".

 TVM_REGISTER_TARGET_KIND("c", kDLCPU)
     .add_attr_option<String>("mcpu")
     .add_attr_option<String>("march")
     .add_attr_option<Integer>("workspace-byte-alignment")
     .add_attr_option<Integer>("constants-byte-alignment")
     .set_default_keys({"cpu"})
     .set_target_parser(tvm::target::parsers::cpu::ParseTarget);

 TVM_REGISTER_TARGET_KIND("cuda", kDLCUDA)
     .add_attr_option<String>("mcpu")
     .add_attr_option<String>("arch")
     .add_attr_option<Integer>("max_shared_memory_per_block")
     .add_attr_option<Integer>("max_threads_per_block")
     .add_attr_option<Integer>("thread_warp_size", Integer(32))
     .add_attr_option<Integer>("registers_per_block")
     .add_attr_option<Integer>("max_num_threads", Integer(1024))  // TODO(@zxybazh): deprecate it
     .set_default_keys({"cuda", "gpu"})
     .set_target_parser(UpdateCUDAAttrs);

 TVM_REGISTER_TARGET_KIND("nvptx", kDLCUDA)
     .add_attr_option<String>("mcpu")
     .add_attr_option<String>("mtriple")
     .add_attr_option<Integer>("max_num_threads", Integer(1024))
     .add_attr_option<Integer>("thread_warp_size", Integer(32))
     .set_default_keys({"cuda", "gpu"})
     .set_target_parser(UpdateNVPTXAttrs);

 TVM_REGISTER_TARGET_KIND("rocm", kDLROCM)
     .add_attr_option<String>("mcpu")
     .add_attr_option<String>("mtriple")
     .add_attr_option<Array<String>>("mattr")
     // TODO(masahi): Support querying from a target device
     // On RDNA cards, thread_warp_size should be 32
     .add_attr_option<Integer>("max_num_threads", Integer(256))
     .add_attr_option<Integer>("max_threads_per_block", Integer(256))
     .add_attr_option<Integer>("max_shared_memory_per_block", Integer(65536))
     .add_attr_option<Integer>("thread_warp_size", Integer(64))
     .set_default_keys({"rocm", "gpu"})
     .set_target_parser(UpdateROCmAttrs);

 TVM_REGISTER_TARGET_KIND("opencl", kDLOpenCL)
     .add_attr_option<Integer>("max_num_threads", Integer(256))
     .add_attr_option<Integer>("thread_warp_size", Integer(1))
     .add_attr_option<Integer>("texture_spatial_limit", Integer(16384))
     .set_default_keys({"opencl", "gpu"});

 // The metal has some limitations on the number of input parameters. This is why attribute
 // `max_function_args` was introduced. It specifies the maximum number of kernel argumetns. More
 // information about this limitation can be found here:
 // https://developer.apple.com/documentation/metal/buffers/about_argument_buffers?language=objc
 // See also https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
 TVM_REGISTER_TARGET_KIND("metal", kDLMetal)
     .add_attr_option<Integer>("max_num_threads", Integer(256))
     .add_attr_option<Integer>("max_threads_per_block", Integer(256))
     .add_attr_option<Integer>("max_shared_memory_per_block", Integer(32768))
     .add_attr_option<Integer>("thread_warp_size", Integer(16))
     .add_attr_option<Integer>("max_function_args", Integer(31))
     .set_default_keys({"metal", "gpu"});

 TVM_REGISTER_TARGET_KIND("vulkan", kDLVulkan)
     .add_attr_option<Array<String>>("mattr")
     // Feature support
     .add_attr_option<Bool>("supports_float16")
     .add_attr_option<Bool>("supports_float32", Bool(true))
     .add_attr_option<Bool>("supports_float64")
     .add_attr_option<Bool>("supports_int8")
     .add_attr_option<Bool>("supports_int16")
     .add_attr_option<Bool>("supports_int32", Bool(true))
     .add_attr_option<Bool>("supports_int64")
     .add_attr_option<Bool>("supports_8bit_buffer")
     .add_attr_option<Bool>("supports_16bit_buffer")
     .add_attr_option<Bool>("supports_storage_buffer_storage_class")
     .add_attr_option<Bool>("supports_push_descriptor")
     .add_attr_option<Bool>("supports_dedicated_allocation")
     .add_attr_option<Bool>("supports_integer_dot_product")
     .add_attr_option<Integer>("supported_subgroup_operations")
     // Physical device limits
     .add_attr_option<Integer>("max_num_threads", Integer(256))
     .add_attr_option<Integer>("max_threads_per_block", Integer(256))
     .add_attr_option<Integer>("thread_warp_size", Integer(1))
     .add_attr_option<Integer>("max_block_size_x")
     .add_attr_option<Integer>("max_block_size_y")
     .add_attr_option<Integer>("max_block_size_z")
     .add_attr_option<Integer>("max_push_constants_size")
     .add_attr_option<Integer>("max_uniform_buffer_range")
     .add_attr_option<Integer>("max_storage_buffer_range")
     .add_attr_option<Integer>("max_per_stage_descriptor_storage_buffer")
     .add_attr_option<Integer>("max_shared_memory_per_block")
     // Other device properties
     .add_attr_option<String>("device_type")
     .add_attr_option<String>("device_name")
     .add_attr_option<String>("driver_name")
     .add_attr_option<Integer>("driver_version")
     .add_attr_option<Integer>("vulkan_api_version")
     .add_attr_option<Integer>("max_spirv_version")
     // Tags
     .set_default_keys({"vulkan", "gpu"});

 TVM_REGISTER_TARGET_KIND("webgpu", kDLWebGPU)
     .add_attr_option<Integer>("max_num_threads", Integer(256))
     .set_default_keys({"webgpu", "gpu"});

 TVM_REGISTER_TARGET_KIND("sdaccel", kDLOpenCL)  // line break
     .set_default_keys({"sdaccel", "hls"});

 TVM_REGISTER_TARGET_KIND("aocl", kDLAOCL)  // line break
     .set_default_keys({"aocl", "hls"});

 TVM_REGISTER_TARGET_KIND("aocl_sw_emu", kDLAOCL)  // line break
     .set_default_keys({"aocl", "hls"});

 TVM_REGISTER_TARGET_KIND("hexagon", kDLHexagon)
     .add_attr_option<Array<String>>("mattr")
     .add_attr_option<String>("mcpu")
     .add_attr_option<String>("mtriple")
     .add_attr_option<Array<String>>("llvm-options")
     .add_attr_option<Integer>("num-cores")
     .add_attr_option<Integer>("vtcm-capacity")
     .set_default_keys({"hexagon"});

 TVM_REGISTER_TARGET_KIND("stackvm", kDLCPU);

 TVM_REGISTER_TARGET_KIND("ext_dev", kDLExtDev);

 TVM_REGISTER_TARGET_KIND("hybrid", kDLCPU);

 TVM_REGISTER_TARGET_KIND("composite", kDLCPU)  // line break
     .add_attr_option<Array<Target>>("devices");

 TVM_REGISTER_TARGET_KIND("test", kDLCPU)  // line break
     .set_target_parser(TestTargetParser);

 /**********  Registry  **********/

 TVM_REGISTER_GLOBAL("target.TargetKindGetAttr")
     .set_body_typed([](TargetKind kind, String attr_name) -> TVMRetValue {
       auto target_attr_map = TargetKind::GetAttrMap<TVMRetValue>(attr_name);
       TVMRetValue rv;
       if (target_attr_map.count(kind)) {
         rv = target_attr_map[kind];
       }
       return rv;
     });
 TVM_REGISTER_GLOBAL("target.ListTargetKinds").set_body_typed(TargetKindRegEntry::ListTargetKinds);
 TVM_REGISTER_GLOBAL("target.ListTargetKindOptions")
     .set_body_typed(TargetKindRegEntry::ListTargetKindOptions);
 TVM_REGISTER_GLOBAL("target.ListTargetKindOptionsFromName")
     .set_body_typed([](String target_kind_name) {
       TargetKind kind = TargetKind::Get(target_kind_name).value();
       return TargetKindRegEntry::ListTargetKindOptions(kind);
     });

 }  // namespace tvm
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/

	/*!
	* \file src/target/target_kind.cc
	* \brief Target kind registry
	*/
	#include <tvm/ir/expr.h>
	#include <tvm/runtime/device_api.h>
	#include <tvm/runtime/registry.h>
	#include <tvm/target/target.h>
	#include <tvm/target/target_kind.h>

	#include <algorithm>

	#include "../node/attr_registry.h"
	#include "./parsers/cpu.h"

	namespace tvm {

	TVM_REGISTER_NODE_TYPE(TargetKindNode);

	TVM_STATIC_IR_FUNCTOR(ReprPrinter, vtable)
	.set_dispatch<TargetKindNode>([](const ObjectRef& obj, ReprPrinter* p) {
	const TargetKind& kind = Downcast<TargetKind>(obj);
	p->stream << kind->name;
	});

	/******** Registry-related code ********/

	using TargetKindRegistry = AttrRegistry<TargetKindRegEntry, TargetKind>;

	Array<String> TargetKindRegEntry::ListTargetKinds() {
	return TargetKindRegistry::Global()->ListAllNames();
	}

	Map<String, String> TargetKindRegEntry::ListTargetKindOptions(const TargetKind& target_kind) {
	Map<String, String> options;
	for (const auto& kv : target_kind->key2vtype_) {
	options.Set(kv.first, kv.second.type_key);
	}
	return options;
	}

	TargetKindRegEntry& TargetKindRegEntry::RegisterOrGet(const String& target_kind_name) {
	return TargetKindRegistry::Global()->RegisterOrGet(target_kind_name);
	}

	void TargetKindRegEntry::UpdateAttr(const String& key, TVMRetValue value, int plevel) {
	TargetKindRegistry::Global()->UpdateAttr(key, kind_, value, plevel);
	}

	const AttrRegistryMapContainerMap<TargetKind>& TargetKind::GetAttrMapContainer(
	const String& attr_name) {
	return TargetKindRegistry::Global()->GetAttrMap(attr_name);
	}

	Optional<TargetKind> TargetKind::Get(const String& target_kind_name) {
	const TargetKindRegEntry* reg = TargetKindRegistry::Global()->Get(target_kind_name);
	if (reg == nullptr) {
	return NullOpt;
	}
	return reg->kind_;
	}

	/******** Utility functions ********/

	/*!
	* \brief Extract a number from the string with the given prefix.
	* For example, when `str` is "sm_20" and `prefix` is "sm_".
	* This function first checks if `str` starts with `prefix`,
	* then return the integer 20 after the `prefix`
	* \param str The string to be extracted
	* \param prefix The prefix to be checked
	* \return An integer, the extracted number. -1 if the check fails
	*/
	static int ExtractIntWithPrefix(const std::string& str, const std::string& prefix) {
	if (str.substr(0, prefix.size()) != prefix) {
	return -1;
	}
	int result = 0;
	for (size_t i = prefix.size(); i < str.size(); ++i) {
	char c = str[i];
	if (!isdigit(c)) {
	return -1;
	}
	result = result * 10 + c - '0';
	}
	return result;
	}

	/*!
	* \brief Using TVM DeviceAPI to detect the device flag
	* \param device The device to be detected
	* \param flag The device flag to be detected
	* \param val The detected value
	* \return A boolean indicating if detection succeeds
	*/
	static bool DetectDeviceFlag(Device device, runtime::DeviceAttrKind flag, TVMRetValue* val) {
	using runtime::DeviceAPI;
	DeviceAPI* api = DeviceAPI::Get(device, true);
	// Check if compiled with the corresponding device api
	if (api == nullptr) {
	return false;
	}
	// Check if the device exists
	api->GetAttr(device, runtime::kExist, val);
	int exists = *val;
	if (!exists) {
	return false;
	}
	// Get the arch of the device
	DeviceAPI::Get(device)->GetAttr(device, flag, val);
	return true;
	}

	void CheckOrSetAttr(Map<String, ObjectRef>* attrs, const String& name, const String& value) {
	auto iter = attrs->find(name);
	if (iter == attrs->end()) {
	attrs->Set(name, value);
	} else {
	const auto* str = (*iter).second.as<StringObj>();
	ICHECK(str != nullptr && GetRef<String>(str) == value)
	<< "ValueError: Expects \"" << name << "\" to be \"" << value
	<< "\", but gets: " << (*iter).second;
	}
	}

	/******** Target kind attribute updaters ********/

	/*!
	* \brief Update the attributes in the CUDA target.
	* \param target The Target to update
	* \return The updated attributes
	*/
	TargetJSON UpdateCUDAAttrs(TargetJSON target) {
	// Update -arch=sm_xx
	int archInt;
	if (target.count("arch")) {
	// If -arch has been specified, validate the correctness
	String archStr = Downcast<String>(target.at("arch"));
	archInt = ExtractIntWithPrefix(archStr, "sm_");
	ICHECK(archInt != -1) << "ValueError: CUDA target gets an invalid CUDA arch: -arch=" << archStr;
	} else {
	// Use the compute version of the first CUDA GPU instead
	TVMRetValue version;
	if (!DetectDeviceFlag({kDLCUDA, 0}, runtime::kComputeVersion, &version)) {
	LOG(WARNING) << "Unable to detect CUDA version, default to \"-arch=sm_20\" instead";
	archInt = 20;
	} else {
	archInt = std::stod(version.operator std::string()) * 10 + 0.1;
	}
	target.Set("arch", String("sm_") + std::to_string(archInt));
	}
	return target;
	}

	/*!
	* \brief Update the attributes in the LLVM NVPTX target.
	* \param target The Target to update
	* \return The updated attributes
	*/
	TargetJSON UpdateNVPTXAttrs(TargetJSON target) {
	CheckOrSetAttr(&target, "mtriple", "nvptx64-nvidia-cuda");
	// Update -mcpu=sm_xx
	int arch;
	if (target.count("mcpu")) {
	// If -mcpu has been specified, validate the correctness
	String mcpu = Downcast<String>(target.at("mcpu"));
	arch = ExtractIntWithPrefix(mcpu, "sm_");
	ICHECK(arch != -1) << "ValueError: NVPTX target gets an invalid CUDA arch: -mcpu=" << mcpu;
	} else {
	// Use the compute version of the first CUDA GPU instead
	TVMRetValue version;
	if (!DetectDeviceFlag({kDLCUDA, 0}, runtime::kComputeVersion, &version)) {
	LOG(WARNING) << "Unable to detect CUDA version, default to \"-mcpu=sm_20\" instead";
	arch = 20;
	} else {
	arch = std::stod(version.operator std::string()) * 10 + 0.1;
	}
	target.Set("mcpu", String("sm_") + std::to_string(arch));
	}
	return target;
	}

	/*!
	* \brief Update the attributes in the LLVM ROCm target.
	* \param target The Target to update
	* \return The updated attributes
	*/
	TargetJSON UpdateROCmAttrs(TargetJSON target) {
	CheckOrSetAttr(&target, "mtriple", "amdgcn-amd-amdhsa-hcc");
	// Update -mcpu=gfx
	int arch;
	if (target.count("mcpu")) {
	String mcpu = Downcast<String>(target.at("mcpu"));
	arch = ExtractIntWithPrefix(mcpu, "gfx");
	ICHECK(arch != -1) << "ValueError: ROCm target gets an invalid GFX version: -mcpu=" << mcpu;
	} else {
	TVMRetValue val;
	if (!DetectDeviceFlag({kDLROCM, 0}, runtime::kGcnArch, &val)) {
	LOG(WARNING) << "Unable to detect ROCm compute arch, default to \"-mcpu=gfx900\" instead";
	arch = 900;
	} else {
	arch = val.operator int();
	}
	target.Set("mcpu", String("gfx") + std::to_string(arch));
	}
	// Update -mattr before ROCm 3.5:
	// Before ROCm 3.5 we needed code object v2, starting
	// with 3.5 we need v3 (this argument disables v3)

	TVMRetValue val;
	int version;
	if (!DetectDeviceFlag({kDLROCM, 0}, runtime::kApiVersion, &val)) {
	LOG(WARNING) << "Unable to detect ROCm version, assuming >= 3.5";
	version = 305;
	} else {
	version = val.operator int();
	}
	if (version < 305) {
	Array<String> mattr;
	if (target.count("mattr")) {
	mattr = Downcast<Array<String>>(target.at("mattr"));
	}
	mattr.push_back("-code-object-v3");
	target.Set("mattr", mattr);
	}
	return target;
	}

	/*!
	* \brief Test Target Parser
	* \param target The Target to update
	* \return The updated attributes
	*/
	TargetJSON TestTargetParser(TargetJSON target) {
	Map<String, ObjectRef> features = {{"is_test", Bool(true)}};
	target.Set("features", features);
	return target;
	}

	/******** Register Target kinds and attributes ********/

	TVM_REGISTER_TARGET_KIND("llvm", kDLCPU)
	.add_attr_option<Array<String>>("mattr")
	.add_attr_option<String>("mcpu")
	.add_attr_option<String>("mtriple")
	.add_attr_option<String>("mfloat-abi")
	.add_attr_option<String>("mabi")
	.add_attr_option<Integer>("num-cores")
	// Fast math flags, see https://llvm.org/docs/LangRef.html#fast-math-flags
	.add_attr_option<Bool>("fast-math") // implies all the below
	.add_attr_option<Bool>("fast-math-nnan")
	.add_attr_option<Bool>("fast-math-ninf")
	.add_attr_option<Bool>("fast-math-nsz")
	.add_attr_option<Bool>("fast-math-arcp")
	.add_attr_option<Bool>("fast-math-contract")
	.add_attr_option<Bool>("fast-math-reassoc")
	.add_attr_option<Integer>("opt-level")
	// LLVM command line flags, see below
	.add_attr_option<Array<String>>("cl-opt")
	.set_default_keys({"cpu"})
	// Force the external codegen kind attribute to be registered, even if no external
	// codegen targets are enabled by the TVM build.
	.set_attr<Bool>(tvm::attr::kIsExternalCodegen, Bool(false))
	.set_target_parser(tvm::target::parsers::cpu::ParseTarget);

	// Note regarding the "cl-opt" attribute:
	// Each string in the array has the format
	// -optionname[[:type]=value]
	// where
	// * optionname is the actual LLVM option (e.g. "unroll-threshold")
	// * type is one of "bool", "int", "uint", or "string"
	// * value is the corresponding option value (for "bool" type is can be 0 or "false"
	// for false value, or 1 or "true" for true value)
	// If type is omitted, it is assumed to be "bool". If value is omitted, it is assumed
	// to be "true".
	//
	// The type must match the option type in LLVM. To find the type, search the LLVM
	// repository (https://github.com/llvm/llvm-project) for optionname, and look for
	// its definition: it will be a declaration of a variable of type cl::opt<T> with
	// optionname being an argument to the constructor. The T in the declaration is
	// the type.
	// For example, for unroll-threshold, we get the following declaration:
	// static cl::opt<unsigned>
	// UnrollThreshold("unroll-threshold", cl::Hidden,
	// cl::desc("The cost threshold for loop unrolling"));
	// Hence the type is "uint".

	TVM_REGISTER_TARGET_KIND("c", kDLCPU)
	.add_attr_option<String>("mcpu")
	.add_attr_option<String>("march")
	.add_attr_option<Integer>("workspace-byte-alignment")
	.add_attr_option<Integer>("constants-byte-alignment")
	.set_default_keys({"cpu"})
	.set_target_parser(tvm::target::parsers::cpu::ParseTarget);

	TVM_REGISTER_TARGET_KIND("cuda", kDLCUDA)
	.add_attr_option<String>("mcpu")
	.add_attr_option<String>("arch")
	.add_attr_option<Integer>("max_shared_memory_per_block")
	.add_attr_option<Integer>("max_threads_per_block")
	.add_attr_option<Integer>("thread_warp_size", Integer(32))
	.add_attr_option<Integer>("registers_per_block")
	.add_attr_option<Integer>("max_num_threads", Integer(1024)) // TODO(@zxybazh): deprecate it
	.set_default_keys({"cuda", "gpu"})
	.set_target_parser(UpdateCUDAAttrs);

	TVM_REGISTER_TARGET_KIND("nvptx", kDLCUDA)
	.add_attr_option<String>("mcpu")
	.add_attr_option<String>("mtriple")
	.add_attr_option<Integer>("max_num_threads", Integer(1024))
	.add_attr_option<Integer>("thread_warp_size", Integer(32))
	.set_default_keys({"cuda", "gpu"})
	.set_target_parser(UpdateNVPTXAttrs);

	TVM_REGISTER_TARGET_KIND("rocm", kDLROCM)
	.add_attr_option<String>("mcpu")
	.add_attr_option<String>("mtriple")
	.add_attr_option<Array<String>>("mattr")
	// TODO(masahi): Support querying from a target device
	// On RDNA cards, thread_warp_size should be 32
	.add_attr_option<Integer>("max_num_threads", Integer(256))
	.add_attr_option<Integer>("max_threads_per_block", Integer(256))
	.add_attr_option<Integer>("max_shared_memory_per_block", Integer(65536))
	.add_attr_option<Integer>("thread_warp_size", Integer(64))
	.set_default_keys({"rocm", "gpu"})
	.set_target_parser(UpdateROCmAttrs);

	TVM_REGISTER_TARGET_KIND("opencl", kDLOpenCL)
	.add_attr_option<Integer>("max_num_threads", Integer(256))
	.add_attr_option<Integer>("thread_warp_size", Integer(1))
	.add_attr_option<Integer>("texture_spatial_limit", Integer(16384))
	.set_default_keys({"opencl", "gpu"});

	// The metal has some limitations on the number of input parameters. This is why attribute
	// `max_function_args` was introduced. It specifies the maximum number of kernel argumetns. More
	// information about this limitation can be found here:
	// https://developer.apple.com/documentation/metal/buffers/about_argument_buffers?language=objc
	// See also https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
	TVM_REGISTER_TARGET_KIND("metal", kDLMetal)
	.add_attr_option<Integer>("max_num_threads", Integer(256))
	.add_attr_option<Integer>("max_threads_per_block", Integer(256))
	.add_attr_option<Integer>("max_shared_memory_per_block", Integer(32768))
	.add_attr_option<Integer>("thread_warp_size", Integer(16))
	.add_attr_option<Integer>("max_function_args", Integer(31))
	.set_default_keys({"metal", "gpu"});

	TVM_REGISTER_TARGET_KIND("vulkan", kDLVulkan)
	.add_attr_option<Array<String>>("mattr")
	// Feature support
	.add_attr_option<Bool>("supports_float16")
	.add_attr_option<Bool>("supports_float32", Bool(true))
	.add_attr_option<Bool>("supports_float64")
	.add_attr_option<Bool>("supports_int8")
	.add_attr_option<Bool>("supports_int16")
	.add_attr_option<Bool>("supports_int32", Bool(true))
	.add_attr_option<Bool>("supports_int64")
	.add_attr_option<Bool>("supports_8bit_buffer")
	.add_attr_option<Bool>("supports_16bit_buffer")
	.add_attr_option<Bool>("supports_storage_buffer_storage_class")
	.add_attr_option<Bool>("supports_push_descriptor")
	.add_attr_option<Bool>("supports_dedicated_allocation")
	.add_attr_option<Bool>("supports_integer_dot_product")
	.add_attr_option<Integer>("supported_subgroup_operations")
	// Physical device limits
	.add_attr_option<Integer>("max_num_threads", Integer(256))
	.add_attr_option<Integer>("max_threads_per_block", Integer(256))
	.add_attr_option<Integer>("thread_warp_size", Integer(1))
	.add_attr_option<Integer>("max_block_size_x")
	.add_attr_option<Integer>("max_block_size_y")
	.add_attr_option<Integer>("max_block_size_z")
	.add_attr_option<Integer>("max_push_constants_size")
	.add_attr_option<Integer>("max_uniform_buffer_range")
	.add_attr_option<Integer>("max_storage_buffer_range")
	.add_attr_option<Integer>("max_per_stage_descriptor_storage_buffer")
	.add_attr_option<Integer>("max_shared_memory_per_block")
	// Other device properties
	.add_attr_option<String>("device_type")
	.add_attr_option<String>("device_name")
	.add_attr_option<String>("driver_name")
	.add_attr_option<Integer>("driver_version")
	.add_attr_option<Integer>("vulkan_api_version")
	.add_attr_option<Integer>("max_spirv_version")
	// Tags
	.set_default_keys({"vulkan", "gpu"});

	TVM_REGISTER_TARGET_KIND("webgpu", kDLWebGPU)
	.add_attr_option<Integer>("max_num_threads", Integer(256))
	.set_default_keys({"webgpu", "gpu"});

	TVM_REGISTER_TARGET_KIND("sdaccel", kDLOpenCL) // line break
	.set_default_keys({"sdaccel", "hls"});

	TVM_REGISTER_TARGET_KIND("aocl", kDLAOCL) // line break
	.set_default_keys({"aocl", "hls"});

	TVM_REGISTER_TARGET_KIND("aocl_sw_emu", kDLAOCL) // line break
	.set_default_keys({"aocl", "hls"});

	TVM_REGISTER_TARGET_KIND("hexagon", kDLHexagon)
	.add_attr_option<Array<String>>("mattr")
	.add_attr_option<String>("mcpu")
	.add_attr_option<String>("mtriple")
	.add_attr_option<Array<String>>("llvm-options")
	.add_attr_option<Integer>("num-cores")
	.add_attr_option<Integer>("vtcm-capacity")
	.set_default_keys({"hexagon"});

	TVM_REGISTER_TARGET_KIND("stackvm", kDLCPU);

	TVM_REGISTER_TARGET_KIND("ext_dev", kDLExtDev);

	TVM_REGISTER_TARGET_KIND("hybrid", kDLCPU);

	TVM_REGISTER_TARGET_KIND("composite", kDLCPU) // line break
	.add_attr_option<Array<Target>>("devices");

	TVM_REGISTER_TARGET_KIND("test", kDLCPU) // line break
	.set_target_parser(TestTargetParser);

	/******** Registry ********/

	TVM_REGISTER_GLOBAL("target.TargetKindGetAttr")
	.set_body_typed([](TargetKind kind, String attr_name) -> TVMRetValue {
	auto target_attr_map = TargetKind::GetAttrMap<TVMRetValue>(attr_name);
	TVMRetValue rv;
	if (target_attr_map.count(kind)) {
	rv = target_attr_map[kind];
	}
	return rv;
	});
	TVM_REGISTER_GLOBAL("target.ListTargetKinds").set_body_typed(TargetKindRegEntry::ListTargetKinds);
	TVM_REGISTER_GLOBAL("target.ListTargetKindOptions")
	.set_body_typed(TargetKindRegEntry::ListTargetKindOptions);
	TVM_REGISTER_GLOBAL("target.ListTargetKindOptionsFromName")
	.set_body_typed([](String target_kind_name) {
	TargetKind kind = TargetKind::Get(target_kind_name).value();
	return TargetKindRegEntry::ListTargetKindOptions(kind);
	});

	} // namespace tvm