blob: f958c1f8a0cf9ce6e803f9093f80b915353d41e1 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=invalid-name
"""Utility to invoke nvcc compiler in the system"""
from __future__ import absolute_import as _abs
import subprocess
import os
import warnings
import tvm._ffi
from tvm.runtime import ndarray as nd
from . import util
from .._ffi.base import py_str
def compile_cuda(code, target="ptx", arch=None, options=None, path_target=None):
"""Compile cuda code with NVCC from env.
Parameters
----------
code : str
The cuda code.
target : str
The target format
arch : str
The architecture
options : str or list of str
The additional options
path_target : str, optional
Output file.
Return
------
cubin : bytearray
The bytearray of the cubin
"""
temp = util.tempdir()
if target not in ["cubin", "ptx", "fatbin"]:
raise ValueError("target must be in cubin, ptx, fatbin")
temp_code = temp.relpath("my_kernel.cu")
temp_target = temp.relpath("my_kernel.%s" % target)
with open(temp_code, "w") as out_file:
out_file.write(code)
if arch is None:
if nd.gpu(0).exist:
# auto detect the compute arch argument
arch = "sm_" + "".join(nd.gpu(0).compute_version.split("."))
else:
raise ValueError("arch(sm_xy) is not passed, and we cannot detect it from env")
file_target = path_target if path_target else temp_target
cmd = ["nvcc"]
cmd += ["--%s" % target, "-O3"]
if isinstance(arch, list):
cmd += arch
else:
cmd += ["-arch", arch]
if options:
if isinstance(options, str):
cmd += [options]
elif isinstance(options, list):
cmd += options
else:
raise ValueError("options must be str or list of str")
cmd += ["-o", file_target]
cmd += [temp_code]
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
(out, _) = proc.communicate()
if proc.returncode != 0:
msg = code
msg += "\nCompilation error:\n"
msg += py_str(out)
raise RuntimeError(msg)
data = bytearray(open(file_target, "rb").read())
if not data:
raise RuntimeError("Compilation error: empty result is generated")
return data
def find_cuda_path():
"""Utility function to find cuda path
Returns
-------
path : str
Path to cuda root.
"""
if "CUDA_PATH" in os.environ:
return os.environ["CUDA_PATH"]
cmd = ["which", "nvcc"]
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
(out, _) = proc.communicate()
out = py_str(out)
if proc.returncode == 0:
return os.path.realpath(os.path.join(str(out).strip(), "../.."))
cuda_path = "/usr/local/cuda"
if os.path.exists(os.path.join(cuda_path, "bin/nvcc")):
return cuda_path
raise RuntimeError("Cannot find cuda path")
def get_cuda_version(cuda_path):
"""Utility function to get cuda version
Parameters
----------
cuda_path : str
Path to cuda root.
Returns
-------
version : float
The cuda version
"""
version_file_path = os.path.join(cuda_path, "version.txt")
if not os.path.exists(version_file_path):
# Debian/Ubuntu repackaged CUDA path
version_file_path = os.path.join(cuda_path, "lib", "cuda", "version.txt")
try:
with open(version_file_path) as f:
version_str = f.readline().replace("\n", "").replace("\r", "")
return float(version_str.split(" ")[2][:2])
except:
raise RuntimeError("Cannot read cuda version file")
@tvm._ffi.register_func("tvm_callback_libdevice_path")
def find_libdevice_path(arch):
"""Utility function to find libdevice
Parameters
----------
arch : int
The compute architecture in int
Returns
-------
path : str
Path to libdevice.
"""
cuda_path = find_cuda_path()
lib_path = os.path.join(cuda_path, "nvvm/libdevice")
if not os.path.exists(lib_path):
# Debian/Ubuntu repackaged CUDA path
lib_path = os.path.join(cuda_path, "lib/nvidia-cuda-toolkit/libdevice")
selected_ver = 0
selected_path = None
cuda_ver = get_cuda_version(cuda_path)
if cuda_ver in (9.0, 9.1, 10.0, 10.1, 10.2, 11.0):
path = os.path.join(lib_path, "libdevice.10.bc")
else:
for fn in os.listdir(lib_path):
if not fn.startswith("libdevice"):
continue
ver = int(fn.split(".")[-3].split("_")[-1])
if selected_ver < ver <= arch:
selected_ver = ver
selected_path = fn
if selected_path is None:
raise RuntimeError("Cannot find libdevice for arch {}".format(arch))
path = os.path.join(lib_path, selected_path)
return path
def callback_libdevice_path(arch):
try:
return find_libdevice_path(arch)
except RuntimeError:
warnings.warn("Cannot find libdevice path")
return ""
def parse_compute_version(compute_version):
"""Parse compute capability string to divide major and minor version
Parameters
----------
compute_version : str
compute capability of a GPU (e.g. "6.0")
Returns
-------
major : int
major version number
minor : int
minor version number
"""
split_ver = compute_version.split(".")
try:
major = int(split_ver[0])
minor = int(split_ver[1])
return major, minor
except (IndexError, ValueError) as err:
raise RuntimeError("Compute version parsing error: " + str(err))
def have_fp16(compute_version):
"""Either fp16 support is provided in the compute capability or not
Parameters
----------
compute_version: str
compute capability of a GPU (e.g. "6.0")
"""
major, minor = parse_compute_version(compute_version)
# fp 16 support in reference to:
# https://docs.nvidia.com/cuda/cuda-c-programming-guide/#arithmetic-instructions
if major == 5 and minor == 3:
return True
if major >= 6:
return True
return False
def have_int8(compute_version):
"""Either int8 support is provided in the compute capability or not
Parameters
----------
compute_version : str
compute capability of a GPU (e.g. "6.1")
"""
major, _ = parse_compute_version(compute_version)
if major >= 6:
return True
return False
def have_tensorcore(compute_version):
"""Either TensorCore support is provided in the compute capability or not
Parameters
----------
compute_version : str
compute capability of a GPU (e.g. "7.0")
"""
major, _ = parse_compute_version(compute_version)
if major == 7:
return True
return False