src/target/opt/build_cuda_on.cc - tvm - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 /*!
  *  Build cuda modules from source.
  *  requires cuda to be available.
  *
  * \file build_cuda.cc
  */
 #if defined(__linux__)
 #include <sys/stat.h>
 #endif
 #include <cuda_runtime.h>
 #include <nvrtc.h>

 #include <cstdlib>

 #include "../../runtime/cuda/cuda_common.h"
 #include "../../runtime/cuda/cuda_module.h"
 #include "../build_common.h"
 #include "../source/codegen_cuda.h"

 namespace tvm {
 namespace codegen {

 #define NVRTC_CALL(x)                                                                        \
   {                                                                                          \
     nvrtcResult result = x;                                                                  \
     if (result != NVRTC_SUCCESS) {                                                           \
       LOG(FATAL) << "NvrtcError: " #x " failed with error: " << nvrtcGetErrorString(result); \
     }                                                                                        \
   }

 std::string FindCUDAIncludePath() {
 #if defined(_WIN32)
   const std::string delimiter = "\\";
 #else
   const std::string delimiter = "/";
 #endif
   std::string cuda_include_path;
   const char* cuda_path_env = std::getenv("CUDA_PATH");
   if (cuda_path_env != nullptr) {
     cuda_include_path += cuda_path_env;
     cuda_include_path += delimiter + "include";
     return cuda_include_path;
   }

 #if defined(__linux__)
   struct stat st;
   cuda_include_path = "/usr/local/cuda/include";
   if (stat(cuda_include_path.c_str(), &st) == 0) {
     return cuda_include_path;
   }

   if (stat("/usr/include/cuda.h", &st) == 0) {
     return "/usr/include";
   }
 #endif
   LOG(FATAL) << "Cannot find cuda include path."
              << "CUDA_PATH is not set or CUDA is not installed in the default installation path."
              << "In other than linux, it is necessary to set CUDA_PATH.";
   return cuda_include_path;
 }

 std::string NVRTCCompile(const std::string& code, bool include_path = false) {
   std::vector<std::string> compile_params;
   std::vector<const char*> param_cstrings{};
   nvrtcProgram prog;
   std::string cc = "30";
   int major, minor;
   cudaError_t e1 = cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, 0);
   cudaError_t e2 = cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, 0);

   if (e1 == cudaSuccess && e2 == cudaSuccess) {
     cc = std::to_string(major) + std::to_string(minor);
   } else {
     LOG(WARNING) << "cannot detect compute capability from your device, "
                  << "fall back to compute_30.";
   }

   compile_params.push_back("-arch=compute_" + cc);

   if (include_path) {
     std::string include_option = "--include-path=" + FindCUDAIncludePath();

     compile_params.push_back(include_option);
   }

   for (const auto& string : compile_params) {
     param_cstrings.push_back(string.c_str());
   }
   NVRTC_CALL(nvrtcCreateProgram(&prog, code.c_str(), nullptr, 0, nullptr, nullptr));
   nvrtcResult compile_res = nvrtcCompileProgram(prog, param_cstrings.size(), param_cstrings.data());

   size_t log_size;
   NVRTC_CALL(nvrtcGetProgramLogSize(prog, &log_size));
   std::string log;
   log.resize(log_size);
   NVRTC_CALL(nvrtcGetProgramLog(prog, &log[0]));
   ICHECK_EQ(compile_res, NVRTC_SUCCESS) << log;
   size_t ptx_size;
   NVRTC_CALL(nvrtcGetPTXSize(prog, &ptx_size));

   std::string ptx;
   ptx.resize(ptx_size);
   NVRTC_CALL(nvrtcGetPTX(prog, &ptx[0]));
   NVRTC_CALL(nvrtcDestroyProgram(&prog));

   return ptx;
 }

 runtime::Module BuildCUDA(IRModule mod, Target target) {
   using tvm::runtime::Registry;
   bool output_ssa = false;
   CodeGenCUDA cg;
   cg.Init(output_ssa);

   for (auto kv : mod->functions) {
     ICHECK(kv.second->IsInstance<PrimFuncNode>()) << "CodeGenCUDA: Can only take PrimFunc";
     auto f = Downcast<PrimFunc>(kv.second);
     auto calling_conv = f->GetAttr<Integer>(tvm::attr::kCallingConv);
     ICHECK(calling_conv == CallingConv::kDeviceKernelLaunch)
         << "CodeGenCUDA: expect calling_conv equals CallingConv::kDeviceKernelLaunch";
     cg.AddFunction(f);
   }

   std::string code = cg.Finish();

   if (const auto* f = Registry::Get("tvm_callback_cuda_postproc")) {
     code = (*f)(code).operator std::string();
   }
   std::string fmt = "ptx";
   std::string ptx;
   const auto* f_enter = Registry::Get("target.TargetEnterScope");
   (*f_enter)(target);
   if (const auto* f = Registry::Get("tvm_callback_cuda_compile")) {
     ptx = (*f)(code).operator std::string();
     // Dirty matching to check PTX vs cubin.
     // TODO(tqchen) more reliable checks
     if (ptx[0] != '/') fmt = "cubin";
   } else {
     ptx = NVRTCCompile(code, cg.need_include_path());
   }
   const auto* f_exit = Registry::Get("target.TargetExitScope");
   (*f_exit)(target);
   return CUDAModuleCreate(ptx, fmt, ExtractFuncInfo(mod), code);
 }

 TVM_REGISTER_GLOBAL("target.build.cuda").set_body_typed(BuildCUDA);
 }  // namespace codegen
 }  // namespace tvm
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/

	/*!
	* Build cuda modules from source.
	* requires cuda to be available.
	*
	* \file build_cuda.cc
	*/
	#if defined(__linux__)
	#include <sys/stat.h>
	#endif
	#include <cuda_runtime.h>
	#include <nvrtc.h>

	#include <cstdlib>

	#include "../../runtime/cuda/cuda_common.h"
	#include "../../runtime/cuda/cuda_module.h"
	#include "../build_common.h"
	#include "../source/codegen_cuda.h"

	namespace tvm {
	namespace codegen {

	#define NVRTC_CALL(x) \
	{ \
	nvrtcResult result = x; \
	if (result != NVRTC_SUCCESS) { \
	LOG(FATAL) << "NvrtcError: " #x " failed with error: " << nvrtcGetErrorString(result); \
	} \
	}

	std::string FindCUDAIncludePath() {
	#if defined(_WIN32)
	const std::string delimiter = "\\";
	#else
	const std::string delimiter = "/";
	#endif
	std::string cuda_include_path;
	const char* cuda_path_env = std::getenv("CUDA_PATH");
	if (cuda_path_env != nullptr) {
	cuda_include_path += cuda_path_env;
	cuda_include_path += delimiter + "include";
	return cuda_include_path;
	}

	#if defined(__linux__)
	struct stat st;
	cuda_include_path = "/usr/local/cuda/include";
	if (stat(cuda_include_path.c_str(), &st) == 0) {
	return cuda_include_path;
	}

	if (stat("/usr/include/cuda.h", &st) == 0) {
	return "/usr/include";
	}
	#endif
	LOG(FATAL) << "Cannot find cuda include path."
	<< "CUDA_PATH is not set or CUDA is not installed in the default installation path."
	<< "In other than linux, it is necessary to set CUDA_PATH.";
	return cuda_include_path;
	}

	std::string NVRTCCompile(const std::string& code, bool include_path = false) {
	std::vector<std::string> compile_params;
	std::vector<const char*> param_cstrings{};
	nvrtcProgram prog;
	std::string cc = "30";
	int major, minor;
	cudaError_t e1 = cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, 0);
	cudaError_t e2 = cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, 0);

	if (e1 == cudaSuccess && e2 == cudaSuccess) {
	cc = std::to_string(major) + std::to_string(minor);
	} else {
	LOG(WARNING) << "cannot detect compute capability from your device, "
	<< "fall back to compute_30.";
	}

	compile_params.push_back("-arch=compute_" + cc);

	if (include_path) {
	std::string include_option = "--include-path=" + FindCUDAIncludePath();

	compile_params.push_back(include_option);
	}

	for (const auto& string : compile_params) {
	param_cstrings.push_back(string.c_str());
	}
	NVRTC_CALL(nvrtcCreateProgram(&prog, code.c_str(), nullptr, 0, nullptr, nullptr));
	nvrtcResult compile_res = nvrtcCompileProgram(prog, param_cstrings.size(), param_cstrings.data());

	size_t log_size;
	NVRTC_CALL(nvrtcGetProgramLogSize(prog, &log_size));
	std::string log;
	log.resize(log_size);
	NVRTC_CALL(nvrtcGetProgramLog(prog, &log[0]));
	ICHECK_EQ(compile_res, NVRTC_SUCCESS) << log;
	size_t ptx_size;
	NVRTC_CALL(nvrtcGetPTXSize(prog, &ptx_size));

	std::string ptx;
	ptx.resize(ptx_size);
	NVRTC_CALL(nvrtcGetPTX(prog, &ptx[0]));
	NVRTC_CALL(nvrtcDestroyProgram(&prog));

	return ptx;
	}

	runtime::Module BuildCUDA(IRModule mod, Target target) {
	using tvm::runtime::Registry;
	bool output_ssa = false;
	CodeGenCUDA cg;
	cg.Init(output_ssa);

	for (auto kv : mod->functions) {
	ICHECK(kv.second->IsInstance<PrimFuncNode>()) << "CodeGenCUDA: Can only take PrimFunc";
	auto f = Downcast<PrimFunc>(kv.second);
	auto calling_conv = f->GetAttr<Integer>(tvm::attr::kCallingConv);
	ICHECK(calling_conv == CallingConv::kDeviceKernelLaunch)
	<< "CodeGenCUDA: expect calling_conv equals CallingConv::kDeviceKernelLaunch";
	cg.AddFunction(f);
	}

	std::string code = cg.Finish();

	if (const auto* f = Registry::Get("tvm_callback_cuda_postproc")) {
	code = (*f)(code).operator std::string();
	}
	std::string fmt = "ptx";
	std::string ptx;
	const auto* f_enter = Registry::Get("target.TargetEnterScope");
	(*f_enter)(target);
	if (const auto* f = Registry::Get("tvm_callback_cuda_compile")) {
	ptx = (*f)(code).operator std::string();
	// Dirty matching to check PTX vs cubin.
	// TODO(tqchen) more reliable checks
	if (ptx[0] != '/') fmt = "cubin";
	} else {
	ptx = NVRTCCompile(code, cg.need_include_path());
	}
	const auto* f_exit = Registry::Get("target.TargetExitScope");
	(*f_exit)(target);
	return CUDAModuleCreate(ptx, fmt, ExtractFuncInfo(mod), code);
	}

	TVM_REGISTER_GLOBAL("target.build.cuda").set_body_typed(BuildCUDA);
	} // namespace codegen
	} // namespace tvm