| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # pylint: disable=invalid-name |
| """Utility to invoke nvcc compiler in the system""" |
| from __future__ import absolute_import as _abs |
| |
| import subprocess |
| import os |
| import warnings |
| |
| import tvm._ffi |
| from tvm.runtime import ndarray as nd |
| |
| from . import util |
| from .._ffi.base import py_str |
| |
| |
| def compile_cuda(code, target="ptx", arch=None, options=None, path_target=None): |
| """Compile cuda code with NVCC from env. |
| |
| Parameters |
| ---------- |
| code : str |
| The cuda code. |
| |
| target : str |
| The target format |
| |
| arch : str |
| The architecture |
| |
| options : str or list of str |
| The additional options |
| |
| path_target : str, optional |
| Output file. |
| |
| Return |
| ------ |
| cubin : bytearray |
| The bytearray of the cubin |
| """ |
| temp = util.tempdir() |
| if target not in ["cubin", "ptx", "fatbin"]: |
| raise ValueError("target must be in cubin, ptx, fatbin") |
| temp_code = temp.relpath("my_kernel.cu") |
| temp_target = temp.relpath("my_kernel.%s" % target) |
| |
| with open(temp_code, "w") as out_file: |
| out_file.write(code) |
| |
| if arch is None: |
| if nd.gpu(0).exist: |
| # auto detect the compute arch argument |
| arch = "sm_" + "".join(nd.gpu(0).compute_version.split(".")) |
| else: |
| raise ValueError("arch(sm_xy) is not passed, and we cannot detect it from env") |
| |
| file_target = path_target if path_target else temp_target |
| cmd = ["nvcc"] |
| cmd += ["--%s" % target, "-O3"] |
| if isinstance(arch, list): |
| cmd += arch |
| else: |
| cmd += ["-arch", arch] |
| |
| if options: |
| if isinstance(options, str): |
| cmd += [options] |
| elif isinstance(options, list): |
| cmd += options |
| else: |
| raise ValueError("options must be str or list of str") |
| |
| cmd += ["-o", file_target] |
| cmd += [temp_code] |
| |
| proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) |
| |
| (out, _) = proc.communicate() |
| |
| if proc.returncode != 0: |
| msg = code |
| msg += "\nCompilation error:\n" |
| msg += py_str(out) |
| raise RuntimeError(msg) |
| |
| data = bytearray(open(file_target, "rb").read()) |
| if not data: |
| raise RuntimeError("Compilation error: empty result is generated") |
| return data |
| |
| |
| def find_cuda_path(): |
| """Utility function to find cuda path |
| |
| Returns |
| ------- |
| path : str |
| Path to cuda root. |
| """ |
| if "CUDA_PATH" in os.environ: |
| return os.environ["CUDA_PATH"] |
| cmd = ["which", "nvcc"] |
| proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) |
| (out, _) = proc.communicate() |
| out = py_str(out) |
| if proc.returncode == 0: |
| return os.path.realpath(os.path.join(str(out).strip(), "../..")) |
| cuda_path = "/usr/local/cuda" |
| if os.path.exists(os.path.join(cuda_path, "bin/nvcc")): |
| return cuda_path |
| raise RuntimeError("Cannot find cuda path") |
| |
| |
| def get_cuda_version(cuda_path): |
| """Utility function to get cuda version |
| |
| Parameters |
| ---------- |
| cuda_path : str |
| Path to cuda root. |
| |
| Returns |
| ------- |
| version : float |
| The cuda version |
| """ |
| version_file_path = os.path.join(cuda_path, "version.txt") |
| if not os.path.exists(version_file_path): |
| # Debian/Ubuntu repackaged CUDA path |
| version_file_path = os.path.join(cuda_path, "lib", "cuda", "version.txt") |
| try: |
| with open(version_file_path) as f: |
| version_str = f.readline().replace("\n", "").replace("\r", "") |
| return float(version_str.split(" ")[2][:2]) |
| except: |
| raise RuntimeError("Cannot read cuda version file") |
| |
| |
| @tvm._ffi.register_func("tvm_callback_libdevice_path") |
| def find_libdevice_path(arch): |
| """Utility function to find libdevice |
| |
| Parameters |
| ---------- |
| arch : int |
| The compute architecture in int |
| |
| Returns |
| ------- |
| path : str |
| Path to libdevice. |
| """ |
| cuda_path = find_cuda_path() |
| lib_path = os.path.join(cuda_path, "nvvm/libdevice") |
| if not os.path.exists(lib_path): |
| # Debian/Ubuntu repackaged CUDA path |
| lib_path = os.path.join(cuda_path, "lib/nvidia-cuda-toolkit/libdevice") |
| selected_ver = 0 |
| selected_path = None |
| cuda_ver = get_cuda_version(cuda_path) |
| if cuda_ver in (9.0, 9.1, 10.0, 10.1, 10.2, 11.0): |
| path = os.path.join(lib_path, "libdevice.10.bc") |
| else: |
| for fn in os.listdir(lib_path): |
| if not fn.startswith("libdevice"): |
| continue |
| ver = int(fn.split(".")[-3].split("_")[-1]) |
| if selected_ver < ver <= arch: |
| selected_ver = ver |
| selected_path = fn |
| if selected_path is None: |
| raise RuntimeError("Cannot find libdevice for arch {}".format(arch)) |
| path = os.path.join(lib_path, selected_path) |
| return path |
| |
| |
| def callback_libdevice_path(arch): |
| try: |
| return find_libdevice_path(arch) |
| except RuntimeError: |
| warnings.warn("Cannot find libdevice path") |
| return "" |
| |
| |
| def parse_compute_version(compute_version): |
| """Parse compute capability string to divide major and minor version |
| |
| Parameters |
| ---------- |
| compute_version : str |
| compute capability of a GPU (e.g. "6.0") |
| |
| Returns |
| ------- |
| major : int |
| major version number |
| minor : int |
| minor version number |
| """ |
| split_ver = compute_version.split(".") |
| try: |
| major = int(split_ver[0]) |
| minor = int(split_ver[1]) |
| return major, minor |
| except (IndexError, ValueError) as err: |
| raise RuntimeError("Compute version parsing error: " + str(err)) |
| |
| |
| def have_fp16(compute_version): |
| """Either fp16 support is provided in the compute capability or not |
| |
| Parameters |
| ---------- |
| compute_version: str |
| compute capability of a GPU (e.g. "6.0") |
| """ |
| major, minor = parse_compute_version(compute_version) |
| # fp 16 support in reference to: |
| # https://docs.nvidia.com/cuda/cuda-c-programming-guide/#arithmetic-instructions |
| if major == 5 and minor == 3: |
| return True |
| if major >= 6: |
| return True |
| |
| return False |
| |
| |
| def have_int8(compute_version): |
| """Either int8 support is provided in the compute capability or not |
| |
| Parameters |
| ---------- |
| compute_version : str |
| compute capability of a GPU (e.g. "6.1") |
| """ |
| major, _ = parse_compute_version(compute_version) |
| if major >= 6: |
| return True |
| |
| return False |
| |
| |
| def have_tensorcore(compute_version): |
| """Either TensorCore support is provided in the compute capability or not |
| |
| Parameters |
| ---------- |
| compute_version : str |
| compute capability of a GPU (e.g. "7.0") |
| """ |
| major, _ = parse_compute_version(compute_version) |
| if major == 7: |
| return True |
| |
| return False |