blob: d4e221d4bb61e9b1da85ec32a425d66deae1a752 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
ROOTDIR = $(CURDIR)
TPARTYDIR = $(ROOTDIR)/3rdparty
ifeq ($(OS),Windows_NT)
UNAME_S := Windows
else
UNAME_S := $(shell uname -s)
UNAME_P := $(shell uname -p)
endif
ifndef config
ifdef CXXNET_CONFIG
config = $(CXXNET_CONFIG)
else ifneq ("$(wildcard ./config.mk)","")
config = config.mk
else
config = make/config.mk
endif
endif
ifndef DMLC_CORE
DMLC_CORE = $(TPARTYDIR)/dmlc-core
endif
CORE_INC = $(wildcard $(DMLC_CORE)/include/*/*.h)
ifndef NNVM_PATH
NNVM_PATH = $(TPARTYDIR)/tvm/nnvm
endif
ifndef DLPACK_PATH
DLPACK_PATH = $(ROOTDIR)/3rdparty/dlpack
endif
ifndef AMALGAMATION_PATH
AMALGAMATION_PATH = $(ROOTDIR)/amalgamation
endif
ifndef TVM_PATH
TVM_PATH = $(TPARTYDIR)/tvm
endif
ifndef LLVM_PATH
LLVM_PATH = $(TVM_PATH)/build/llvm
endif
ifneq ($(USE_OPENMP), 1)
export NO_OPENMP = 1
endif
# use customized config file
include $(config)
ifndef USE_MKLDNN
ifneq ($(UNAME_S), Darwin)
ifneq ($(UNAME_S), Windows)
ifeq ($(UNAME_P), x86_64)
USE_MKLDNN=1
endif
endif
endif
endif
ifeq ($(USE_MKL2017), 1)
$(warning "USE_MKL2017 is deprecated. We will switch to USE_MKLDNN.")
USE_MKLDNN=1
endif
ifeq ($(USE_MKLDNN), 1)
MKLDNNROOT = $(ROOTDIR)/3rdparty/mkldnn/build/install
endif
ifndef USE_INTGEMM
ifeq ($(UNAME_P), x86_64)
COMPILER := $(shell $(CXX) --version |head -n 1 |cut -d " " -f 1)
COMPILER_VERSION := $(shell $(CXX) -dumpversion |cut -d . -f 1)
ifeq ($(COMPILER), clang)
USE_INTGEMM=1
endif
ifeq ($(COMPILER), Apple)
USE_INTGEMM=1
endif
# If it's not clang and not Apple clang, it's probably gcc and we need at least 5.
# gcc --version gives the name of the program it was called with, which makes it hard to detect.
COMPILER_VERSION_GE_5 := $(shell expr $(COMPILER_VERSION) \>= 5)
ifeq ($(COMPILER_VERSION_GE_5), 1)
USE_INTGEMM=1
endif
endif
endif
include $(TPARTYDIR)/mshadow/make/mshadow.mk
include $(DMLC_CORE)/make/dmlc.mk
# all tge possible warning tread
WARNFLAGS= -Wall -Wsign-compare
CFLAGS = -DMSHADOW_FORCE_STREAM $(WARNFLAGS)
# use old thread local implementation in DMLC-CORE
CFLAGS += -DDMLC_MODERN_THREAD_LOCAL=0
# disable stack trace in exception by default.
CFLAGS += -DDMLC_LOG_STACK_TRACE_SIZE=0
CFLAGS += -DDMLC_LOG_FATAL_THROW=1
ifeq ($(DEV), 1)
# Excluded from Werror:
# 1) variables used in '#pragma omp parallel' are considered unused
CFLAGS += -g -Werror -Wno-error=unused-variable -Wno-error=maybe-uninitialized -Wno-error=unused-function
NVCCFLAGS += -Werror cross-execution-space-call
endif
# CFLAGS for debug
ifeq ($(DEBUG), 1)
CFLAGS += -g -O0 -D_GLIBCXX_ASSERTIONS
else
CFLAGS += -O3 -DNDEBUG=1
endif
CFLAGS += -I$(TPARTYDIR)/mshadow/ -I$(TPARTYDIR)/dmlc-core/include -fPIC -I$(NNVM_PATH)/include -I$(DLPACK_PATH)/include -I$(TPARTYDIR)/tvm/include -Iinclude $(MSHADOW_CFLAGS)
LDFLAGS = -pthread -ldl $(MSHADOW_LDFLAGS) $(DMLC_LDFLAGS)
# please note that when you enable this, you might run into an linker not being able to work properly due to large code injection.
# you can find more information here https://github.com/apache/incubator-mxnet/issues/15971
ifeq ($(ENABLE_TESTCOVERAGE), 1)
CFLAGS += --coverage
LDFLAGS += --coverage
endif
ifeq ($(USE_NVTX), 1)
CFLAGS += -DMXNET_USE_NVTX=1
LDFLAGS += -lnvToolsExt
endif
ifeq ($(USE_TENSORRT), 1)
CFLAGS += -I$(ROOTDIR) -I$(TPARTYDIR) -DONNX_NAMESPACE=$(ONNX_NAMESPACE) -DMXNET_USE_TENSORRT=1
LDFLAGS += -lprotobuf -pthread -lonnx -lonnx_proto -lnvonnxparser -lnvonnxparser_runtime -lnvinfer -lnvinfer_plugin
endif
# -L/usr/local/lib
ifeq ($(DEBUG), 1)
NVCCFLAGS += -std=c++11 -Xcompiler -D_FORCE_INLINES -g -G -O0 -ccbin $(CXX) $(MSHADOW_NVCCFLAGS)
else
NVCCFLAGS += -std=c++11 -Xcompiler -D_FORCE_INLINES -O3 -ccbin $(CXX) $(MSHADOW_NVCCFLAGS)
endif
# CFLAGS for segfault logger
ifeq ($(USE_SIGNAL_HANDLER), 1)
CFLAGS += -DMXNET_USE_SIGNAL_HANDLER=1
endif
# Caffe Plugin
ifdef CAFFE_PATH
CFLAGS += -DMXNET_USE_CAFFE=1
endif
ifndef LINT_LANG
LINT_LANG = "all"
endif
ifeq ($(USE_MKLDNN), 1)
CFLAGS += -DMXNET_USE_MKLDNN=1
CFLAGS += -I$(ROOTDIR)/src/operator/nn/mkldnn/
CFLAGS += -I$(MKLDNNROOT)/include
LIB_DEP += $(MKLDNNROOT)/lib/libdnnl.a
endif
# Use MKL's layernorm implementation. Only has an impact if MKL is compiled in.
ifeq ($(USE_MKL_LAYERNORM), 1)
CFLAGS += -DMXNET_USE_MKL_LAYERNORM=1
endif
# setup opencv
ifeq ($(USE_OPENCV), 1)
CFLAGS += -DMXNET_USE_OPENCV=1
ifneq ($(filter-out NONE, $(USE_OPENCV_INC_PATH)),)
CFLAGS += -I$(USE_OPENCV_INC_PATH)/include
ifeq ($(filter-out NONE, $(USE_OPENCV_LIB_PATH)),)
$(error Please add the path of OpenCV shared library path into `USE_OPENCV_LIB_PATH`, when `USE_OPENCV_INC_PATH` is not NONE)
endif
LDFLAGS += -L$(USE_OPENCV_LIB_PATH)
ifneq ($(wildcard $(USE_OPENCV_LIB_PATH)/libopencv_imgcodecs.*),)
LDFLAGS += -lopencv_imgcodecs
endif
ifneq ($(wildcard $(USE_OPENCV_LIB_PATH)/libopencv_highgui.*),)
LDFLAGS += -lopencv_highgui
endif
else
ifeq ("$(shell pkg-config --exists opencv4; echo $$?)", "0")
OPENCV_LIB = opencv4
else
OPENCV_LIB = opencv
endif
CFLAGS += $(shell pkg-config --cflags $(OPENCV_LIB))
LDFLAGS += $(shell pkg-config --libs-only-L $(OPENCV_LIB))
LDFLAGS += $(filter -lopencv_imgcodecs -lopencv_highgui, $(shell pkg-config --libs-only-l $(OPENCV_LIB)))
endif
LDFLAGS += -lopencv_imgproc -lopencv_core
BIN += bin/im2rec
else
CFLAGS += -DMXNET_USE_OPENCV=0
endif
ifeq ($(USE_OPENMP), 1)
CFLAGS += -fopenmp
CFLAGS += -DMXNET_USE_OPENMP=1
endif
ifeq ($(USE_NNPACK), 1)
CFLAGS += -DMXNET_USE_NNPACK=1
LDFLAGS += -lnnpack
endif
ifeq ($(USE_OPERATOR_TUNING), 1)
CFLAGS += -DMXNET_USE_OPERATOR_TUNING=1
endif
ifeq ($(USE_INT64_TENSOR_SIZE), 1)
CFLAGS += -DMSHADOW_INT64_TENSOR_SIZE=1
else
CFLAGS += -DMSHADOW_INT64_TENSOR_SIZE=0
endif
# verify existence of separate lapack library when using blas/openblas/atlas
# switch off lapack support in case it can't be found
# issue covered with this
# - for Ubuntu 14.04 or lower, lapack is not automatically installed with openblas
# - for Ubuntu, installing atlas will not automatically install the atlas provided lapack library
# - for rhel7.2, try installing the package `lapack-static` via yum will dismiss this warning.
# silently switching lapack off instead of letting the build fail because of backward compatibility
ifeq ($(USE_LAPACK), 1)
ifeq ($(USE_BLAS),$(filter $(USE_BLAS),blas openblas atlas mkl))
ifeq (,$(wildcard $(USE_LAPACK_PATH)/liblapack.a))
ifeq (,$(wildcard $(USE_LAPACK_PATH)/liblapack.so))
ifeq (,$(wildcard $(USE_LAPACK_PATH)/liblapack.dylib))
ifeq (,$(wildcard /lib/liblapack.a))
ifeq (,$(wildcard /lib/liblapack.so))
ifeq (,$(wildcard /usr/lib/liblapack.a))
ifeq (,$(wildcard /usr/lib/liblapack.so))
ifeq (,$(wildcard /usr/lib/liblapack.dylib))
ifeq (,$(wildcard /usr/lib64/liblapack.a))
ifeq (,$(wildcard /usr/lib64/liblapack.so))
USE_LAPACK = 0
$(warning "USE_LAPACK disabled because libraries were not found")
endif
endif
endif
endif
endif
endif
endif
endif
endif
endif
endif
endif
# lapack settings.
ifeq ($(USE_LAPACK), 1)
ifneq ($(USE_LAPACK_PATH), )
LDFLAGS += -L$(USE_LAPACK_PATH)
endif
ifeq ($(USE_BLAS),$(filter $(USE_BLAS),blas openblas atlas mkl))
LDFLAGS += -llapack
endif
CFLAGS += -DMXNET_USE_LAPACK
endif
ifeq ($(USE_CUDNN), 1)
CFLAGS += -DMSHADOW_USE_CUDNN=1
LDFLAGS += -lcudnn
endif
ifeq ($(USE_BLAS), openblas)
CFLAGS += -DMXNET_USE_BLAS_OPEN=1
else ifeq ($(USE_BLAS), atlas)
CFLAGS += -DMXNET_USE_BLAS_ATLAS=1
else ifeq ($(USE_BLAS), mkl)
CFLAGS += -DMXNET_USE_BLAS_MKL=1
else ifeq ($(USE_BLAS), apple)
CFLAGS += -DMXNET_USE_BLAS_APPLE=1
endif
# whether to use F16C instruction set extension for fast fp16 compute on CPU
# if cross compiling you may want to explicitly turn it off if target system does not support it
ifndef USE_F16C
ifneq ($(OS),Windows_NT)
detected_OS := $(shell uname -s)
ifeq ($(detected_OS),Darwin)
F16C_SUPP = $(shell sysctl -a | grep machdep.cpu.features | grep F16C)
endif
ifeq ($(detected_OS),Linux)
F16C_SUPP = $(shell cat /proc/cpuinfo | grep flags | grep f16c)
endif
ifneq ($(strip $(F16C_SUPP)),)
USE_F16C=1
else
USE_F16C=0
endif
endif
# if OS is Windows, check if your processor and compiler support F16C architecture.
# One way to check if processor supports it is to download the tool
# https://docs.microsoft.com/en-us/sysinternals/downloads/coreinfo.
# If coreinfo -c shows F16C and compiler supports it,
# then you can set USE_F16C=1 explicitly to leverage that capability"
endif
# gperftools malloc library (tcmalloc)
ifeq ($(USE_GPERFTOOLS), 1)
FIND_LIBFILEEXT=so
ifeq ($(USE_GPERFTOOLS_STATIC), 1)
FIND_LIBFILEEXT=a
endif
FIND_LIBFILE=$(wildcard $(USE_GPERFTOOLS_PATH)/libtcmalloc.$(FIND_LIBFILEEXT))
ifeq (,$(FIND_LIBFILE))
FIND_LIBFILE=$(wildcard /lib/libtcmalloc.$(FIND_LIBFILEEXT))
ifeq (,$(FIND_LIBFILE))
FIND_LIBFILE=$(wildcard /usr/lib/libtcmalloc.$(FIND_LIBFILEEXT))
ifeq (,$(FIND_LIBFILE))
FIND_LIBFILE=$(wildcard /usr/local/lib/libtcmalloc.$(FIND_LIBFILEEXT))
ifeq (,$(FIND_LIBFILE))
FIND_LIBFILE=$(wildcard /usr/lib64/libtcmalloc.$(FIND_LIBFILEEXT))
ifeq (,$(FIND_LIBFILE))
USE_GPERFTOOLS=0
endif
endif
endif
endif
endif
ifeq ($(USE_GPERFTOOLS), 1)
CFLAGS += -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free
LDFLAGS += $(FIND_LIBFILE)
endif
# jemalloc malloc library (if not using gperftools)
else
ifeq ($(USE_JEMALLOC), 1)
FIND_LIBFILEEXT=so
ifeq ($(USE_JEMALLOC_STATIC), 1)
FIND_LIBFILEEXT=a
endif
FIND_LIBFILE=$(wildcard $(USE_JEMALLOC_PATH)/libjemalloc.$(FIND_LIBFILEEXT))
ifeq (,$(FIND_LIBFILE))
FIND_LIBFILE=$(wildcard /lib/libjemalloc.$(FIND_LIBFILEEXT))
ifeq (,$(FIND_LIBFILE))
FIND_LIBFILE=$(wildcard /usr/lib/libjemalloc.$(FIND_LIBFILEEXT))
ifeq (,$(FIND_LIBFILE))
FIND_LIBFILE=$(wildcard /usr/local/lib/libjemalloc.$(FIND_LIBFILEEXT))
ifeq (,$(FIND_LIBFILE))
FIND_LIBFILE=$(wildcard /usr/lib/x86_64-linux-gnu/libjemalloc.$(FIND_LIBFILEEXT))
ifeq (,$(FIND_LIBFILE))
FIND_LIBFILE=$(wildcard /usr/lib64/libjemalloc.$(FIND_LIBFILEEXT))
ifeq (,$(FIND_LIBFILE))
USE_JEMALLOC=0
endif
endif
endif
endif
endif
endif
ifeq ($(USE_JEMALLOC), 1)
CFLAGS += -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc \
-fno-builtin-free -DUSE_JEMALLOC
LDFLAGS += $(FIND_LIBFILE)
endif
endif
endif
# If not using tcmalloc or jemalloc, print a warning (user should consider installing)
ifneq ($(USE_GPERFTOOLS), 1)
ifneq ($(USE_JEMALLOC), 1)
$(warning WARNING: Significant performance increases can be achieved by installing and \
enabling gperftools or jemalloc development packages)
endif
endif
ifeq ($(USE_THREADED_ENGINE), 1)
CFLAGS += -DMXNET_USE_THREADED_ENGINE
endif
ifneq ($(ADD_CFLAGS), NONE)
CFLAGS += $(ADD_CFLAGS)
endif
ifneq ($(ADD_LDFLAGS), NONE)
LDFLAGS += $(ADD_LDFLAGS)
endif
ifeq ($(NVCC), NONE)
# If NVCC has not been manually defined, use the CUDA_PATH bin dir.
ifneq ($(USE_CUDA_PATH), NONE)
NVCC=$(USE_CUDA_PATH)/bin/nvcc
endif
endif
# Guard against displaying nvcc info messages to users not using CUDA.
ifeq ($(USE_CUDA), 1)
# Get AR version, compare with expected ar version and find bigger and smaller version of the two
AR_VERSION := $(shell ar --version | egrep -o "([0-9]{1,}\.)+[0-9]{1,}")
EXPECTED_AR_VERSION := $(shell echo "2.27")
LARGE_VERSION := $(shell printf '%s\n' "$(AR_VERSION)" "$(EXPECTED_AR_VERSION)" | sort -V | tail -n 1)
SMALL_VERSION := $(shell printf '%s\n' "$(AR_VERSION)" "$(EXPECTED_AR_VERSION)" | sort -V | head -n 1)
# If NVCC is not at the location specified, use CUDA_PATH instead.
ifeq ("$(wildcard $(NVCC))","")
ifneq ($(USE_CUDA_PATH), NONE)
NVCC=$(USE_CUDA_PATH)/bin/nvcc
# if larger version is the expected one and larger != smaller
# this means ar version is less than expected version and user needs to be warned
ifeq ($(LARGE_VERSION), $(EXPECTED_AR_VERSION))
ifneq ($(LARGE_VERSION), $(SMALL_VERSION))
define n
endef
$(warning WARNING: Archive utility: ar version being used is less than 2.27.0. $n \
Note that with USE_CUDA=1 flag and USE_CUDNN=1 this is known to cause problems. $n \
For more info see: https://github.com/apache/incubator-mxnet/issues/15084)
$(shell sleep 5)
endif
endif
$(info INFO: nvcc was not found on your path)
$(info INFO: Using $(NVCC) as nvcc path)
else
$(warning WARNING: could not find nvcc compiler, the specified path was: $(NVCC))
endif
endif
endif
# Sets 'CUDA_ARCH', which determines the GPU architectures supported
# by the compiled kernels. Users can edit the KNOWN_CUDA_ARCHS list below
# to remove archs they don't wish to support to speed compilation, or they can
# pre-set the CUDA_ARCH args in config.mk to a non-null value for full control.
#
# For archs in this list, nvcc will create a fat-binary that will include
# the binaries (SASS) for all architectures supported by the installed version
# of the cuda toolkit, plus the assembly (PTX) for the most recent such architecture.
# If these kernels are then run on a newer-architecture GPU, the binary will
# be JIT-compiled by the updated driver from the included PTX.
ifeq ($(USE_CUDA), 1)
ifeq ($(CUDA_ARCH),)
KNOWN_CUDA_ARCHS := 30 35 50 52 60 61 70 75 80
# Run nvcc on a zero-length file to check architecture-level support.
# Create args to include SASS in the fat binary for supported levels.
CUDA_ARCH := $(foreach arch,$(KNOWN_CUDA_ARCHS), \
$(shell $(NVCC) -arch=sm_$(arch) -E --x cu /dev/null >/dev/null 2>&1 && \
echo -gencode arch=compute_$(arch),code=sm_$(arch)))
# Convert a trailing "code=sm_NN" to "code=[sm_NN,compute_NN]" to also
# include the PTX of the most recent arch in the fat-binaries for
# forward compatibility with newer GPUs.
CUDA_ARCH := $(shell echo $(CUDA_ARCH) | sed 's/sm_\([0-9]*\)$$/[sm_\1,compute_\1]/')
# Add fat binary compression if supported by nvcc.
COMPRESS := --fatbin-options -compress-all
CUDA_ARCH += $(shell $(NVCC) -cuda $(COMPRESS) --x cu /dev/null -o /dev/null >/dev/null 2>&1 && \
echo $(COMPRESS))
endif
$(info Running CUDA_ARCH: $(CUDA_ARCH))
endif
# ps-lite
PS_PATH=$(ROOTDIR)/3rdparty/ps-lite
DEPS_PATH=$(shell pwd)/deps
include $(PS_PATH)/make/ps.mk
ifeq ($(USE_DIST_KVSTORE), 1)
CFLAGS += -DMXNET_USE_DIST_KVSTORE -I$(PS_PATH)/include -I$(DEPS_PATH)/include
LIB_DEP += $(PS_PATH)/build/libps.a
LDFLAGS += $(PS_LDFLAGS_A)
endif
.PHONY: clean all extra-packages test lint clean_all rcpplint rcppexport roxygen\
cython3 cython cyclean
all: lib/libmxnet.a lib/libmxnet.so $(BIN) extra-packages extension_libs
SRC = $(wildcard src/*/*/*/*.cc src/*/*/*.cc src/*/*.cc src/*.cc)
ifeq ($(USE_INTGEMM), 1)
ifndef INTGEMM_PATH
INTGEMM_PATH = build/3rdparty/intgemm
endif
CFLAGS += -DMXNET_USE_INTGEMM=1
LIB_DEP += $(INTGEMM_PATH)/libintgemm.a
# Download intgemm if it isn't already
$(INTGEMM_PATH):
@mkdir -p $(INTGEMM_PATH)
rm -rf $(INTGEMM_PATH)
git clone https://github.com/kpu/intgemm $(INTGEMM_PATH)
cd $(INTGEMM_PATH) && git checkout -q 4172dcc209e6793dd920dec9cf9c9fc81605bd9d
$(INTGEMM_PATH)/compile_test_avx512bw.cc: $(INTGEMM_PATH)
@
$(INTGEMM_PATH)/compile_test_avx512vnni.cc: $(INTGEMM_PATH)
@
$(INTGEMM_PATH)/intgemm/intgemm.cc: $(INTGEMM_PATH)
@
# Compiler tests for AVX512BW and AVX512VNNI.
$(INTGEMM_PATH)/intgemm/intgemm_config.h: $(INTGEMM_PATH)/compile_test_avx512bw.cc $(INTGEMM_PATH)/compile_test_avx512vnni.cc
echo '#pragma once' >$(INTGEMM_PATH)/intgemm/intgemm_config.h
$(CXX) $(CFLAGS) $(INTGEMM_PATH)/compile_test_avx512bw.cc 2>/dev/null && echo \#define INTGEMM_COMPILER_SUPPORTS_AVX512BW >>$(INTGEMM_PATH)/intgemm/intgemm_config.h || echo Your compiler is missing AVX512BW support
$(CXX) $(CFLAGS) $(INTGEMM_PATH)/compile_test_avx512vnni.cc 2>/dev/null && echo \#define INTGEMM_COMPILER_SUPPORTS_AVX512VNNI >>$(INTGEMM_PATH)/intgemm/intgemm_config.h || echo Your compiler is missing AVX512VNNI support
$(INTGEMM_PATH)/intgemm/intgemm.o: $(INTGEMM_PATH)/intgemm/intgemm_config.h $(INTGEMM_PATH)/intgemm/intgemm.cc $(wildcard $(INTGEMM_PATH)/intgemm/*.h $(INTGEMM_PATH)/intgemm/*/*.h)
$(CXX) $(CFLAGS) -I$(INTGEMM_PATH) -std=c++11 -c $(INTGEMM_PATH)/intgemm/intgemm.cc -o $@
$(INTGEMM_PATH)/libintgemm.a: $(INTGEMM_PATH)/intgemm/intgemm.o
@mkdir -p $(@D)
ar crv $@ $(filter %.o, $?)
else
#If we're not using intgemm, remove the operators from src.
INTGEMM_OPS := $(wildcard src/operator/contrib/intgemm/*.cc)
SRC := $(filter-out $(INTGEMM_OPS),$(SRC))
endif
OBJ = $(patsubst %.cc, build/%.o, $(SRC))
CUSRC = $(wildcard src/*/*/*/*.cu src/*/*/*.cu src/*/*.cu src/*.cu)
CUOBJ = $(patsubst %.cu, build/%_gpu.o, $(CUSRC))
ifeq ($(USE_TVM_OP), 1)
LIB_DEP += lib/libtvm_runtime.so lib/libtvmop.so
CFLAGS += -I$(TVM_PATH)/include -DMXNET_USE_TVM_OP=1
LDFLAGS += -L$(ROOTDIR)/lib -ltvm_runtime -Wl,-rpath,'$${ORIGIN}'
TVM_USE_CUDA := OFF
ifeq ($(USE_CUDA), 1)
TVM_USE_CUDA := ON
ifneq ($(USE_CUDA_PATH), NONE)
TVM_USE_CUDA := $(USE_CUDA_PATH)
endif
endif
endif
# extra operators
ifneq ($(EXTRA_OPERATORS),)
EXTRA_SRC = $(wildcard $(patsubst %, %/*.cc, $(EXTRA_OPERATORS)) $(patsubst %, %/*/*.cc, $(EXTRA_OPERATORS)))
EXTRA_OBJ = $(patsubst %.cc, %.o, $(EXTRA_SRC))
EXTRA_CUSRC = $(wildcard $(patsubst %, %/*.cu, $(EXTRA_OPERATORS)) $(patsubst %, %/*/*.cu, $(EXTRA_OPERATORS)))
EXTRA_CUOBJ = $(patsubst %.cu, %_gpu.o, $(EXTRA_CUSRC))
else
EXTRA_SRC =
EXTRA_OBJ =
EXTRA_CUSRC =
EXTRA_CUOBJ =
endif
# plugin
PLUGIN_OBJ =
PLUGIN_CUOBJ =
include $(MXNET_PLUGINS)
ifneq ($(UNAME_S), Windows)
ifeq ($(UNAME_S), Darwin)
WHOLE_ARCH= -all_load
NO_WHOLE_ARCH= -noall_load
else
WHOLE_ARCH= --whole-archive
NO_WHOLE_ARCH= --no-whole-archive
endif
endif
# all dep
LIB_DEP += $(DMLC_CORE)/libdmlc.a $(NNVM_PATH)/lib/libnnvm.a
ALL_DEP = $(OBJ) $(EXTRA_OBJ) $(PLUGIN_OBJ) $(LIB_DEP)
ifeq ($(USE_CUDA), 1)
CUDA_VERSION_MAJOR := $(shell $(NVCC) --version | grep "release" | awk '{print $$6}' | cut -c2- | cut -d '.' -f1)
ifeq ($(shell test $(CUDA_VERSION_MAJOR) -lt 11; echo $$?), 0)
CFLAGS += -I$(ROOTDIR)/3rdparty/nvidia_cub -DCUB_IGNORE_DEPRECATED_CPP_DIALECT
endif
ALL_DEP += $(CUOBJ) $(EXTRA_CUOBJ) $(PLUGIN_CUOBJ)
LDFLAGS += -lcufft
ifeq ($(ENABLE_CUDA_RTC), 1)
LDFLAGS += -lcuda -lnvrtc
CFLAGS += -DMXNET_ENABLE_CUDA_RTC=1
endif
# Make sure to add stubs as fallback in order to be able to build
# without full CUDA install (especially if run without nvidia-docker)
LDFLAGS += -L/usr/local/cuda/lib64/stubs
ifeq ($(USE_NCCL), 1)
ifneq ($(USE_NCCL_PATH), NONE)
CFLAGS += -I$(USE_NCCL_PATH)/include
LDFLAGS += -L$(USE_NCCL_PATH)/lib
endif
LDFLAGS += -lnccl
CFLAGS += -DMXNET_USE_NCCL=1
else
CFLAGS += -DMXNET_USE_NCCL=0
endif
else
CFLAGS += -DMXNET_USE_NCCL=0
endif
ifeq ($(USE_LIBJPEG_TURBO), 1)
ifneq ($(USE_LIBJPEG_TURBO_PATH), NONE)
CFLAGS += -I$(USE_LIBJPEG_TURBO_PATH)/include
LDFLAGS += -L$(USE_LIBJPEG_TURBO_PATH)/lib
endif
LDFLAGS += -lturbojpeg
CFLAGS += -DMXNET_USE_LIBJPEG_TURBO=1
else
CFLAGS += -DMXNET_USE_LIBJPEG_TURBO=0
endif
ifeq ($(CI), 1)
MAVEN_ARGS := -B
endif
# For quick compile test, used smaller subset
ALLX_DEP= $(ALL_DEP)
ifeq ($(USE_INTGEMM), 1)
# Enforce a dependency on $(INTGEMM_PATH)/intgemm/intgemm_config.h which is a generated header based on compiler support.
build/src/operator/contrib/intgemm/%.o: src/operator/contrib/intgemm/%.cc $(INTGEMM_PATH)/intgemm/intgemm_config.h | mkldnn
@mkdir -p $(@D)
$(CXX) -std=c++11 -c $(CFLAGS) -MMD -I$(INTGEMM_PATH) -Isrc/operator -c $< -o $@
endif
build/src/%.o: src/%.cc | mkldnn
@mkdir -p $(@D)
$(CXX) -std=c++11 -c $(CFLAGS) -MMD -c $< -o $@
build/src/%_gpu.o: src/%.cu | mkldnn
@mkdir -p $(@D)
$(NVCC) $(NVCCFLAGS) $(CUDA_ARCH) -Xcompiler "$(CFLAGS)" --generate-dependencies -MT build/src/$*_gpu.o $< >build/src/$*_gpu.d
$(NVCC) -c -o $@ $(NVCCFLAGS) $(CUDA_ARCH) -Xcompiler "$(CFLAGS)" $<
# A nvcc bug cause it to generate "generic/xxx.h" dependencies from torch headers.
# Use CXX to generate dependency instead.
build/plugin/%_gpu.o: plugin/%.cu
@mkdir -p $(@D)
$(CXX) -std=c++11 $(CFLAGS) -MM -MT build/plugin/$*_gpu.o $< >build/plugin/$*_gpu.d
$(NVCC) -c -o $@ $(NVCCFLAGS) $(CUDA_ARCH) -Xcompiler "$(CFLAGS)" $<
build/plugin/%.o: plugin/%.cc | mkldnn
@mkdir -p $(@D)
$(CXX) -std=c++11 -c $(CFLAGS) -MMD -c $< -o $@
%_gpu.o: %.cu
@mkdir -p $(@D)
$(NVCC) $(NVCCFLAGS) $(CUDA_ARCH) -Xcompiler "$(CFLAGS) -Isrc/operator" --generate-dependencies -MT $*_gpu.o $< >$*_gpu.d
$(NVCC) -c -o $@ $(NVCCFLAGS) $(CUDA_ARCH) -Xcompiler "$(CFLAGS) -Isrc/operator" $<
%.o: %.cc $(CORE_INC)
@mkdir -p $(@D)
$(CXX) -std=c++11 -c $(CFLAGS) -MMD -Isrc/operator -c $< -o $@
# Set install path for libmxnet.so on Mac OS
ifeq ($(UNAME_S), Darwin)
LDFLAGS += -Wl,-install_name,@rpath/libmxnet.so
endif
# NOTE: to statically link libmxnet.a we need the option
# --Wl,--whole-archive -lmxnet --Wl,--no-whole-archive
lib/libmxnet.a: $(ALLX_DEP)
@mkdir -p $(@D)
ar crv $@ $(filter %.o, $?)
lib/libmxnet.so: $(ALLX_DEP)
@mkdir -p $(@D)
$(CXX) $(CFLAGS) -shared -o $@ $(filter-out %libnnvm.a, $(filter %.o %.a, $^)) $(LDFLAGS) \
-Wl,${WHOLE_ARCH} $(filter %libnnvm.a, $^) -Wl,${NO_WHOLE_ARCH}
$(PS_PATH)/build/libps.a: PSLITE
PSLITE:
$(MAKE) CXX="$(CXX)" DEPS_PATH="$(DEPS_PATH)" -C $(PS_PATH) ps
$(DMLC_CORE)/libdmlc.a: DMLCCORE
DMLCCORE:
+ cd $(DMLC_CORE); $(MAKE) libdmlc.a USE_SSE=$(USE_SSE) config=$(ROOTDIR)/$(config); cd $(ROOTDIR)
lib/libtvm_runtime.so:
echo "Compile TVM"
@mkdir -p $(@D)
[ -e $(LLVM_PATH)/bin/llvm-config ] || sh $(ROOTDIR)/contrib/tvmop/prepare_tvm.sh; \
cd $(TVM_PATH)/build; \
cmake -DUSE_LLVM="$(LLVM_PATH)/bin/llvm-config" \
-DUSE_SORT=OFF -DUSE_CUDA=$(TVM_USE_CUDA) -DUSE_CUDNN=OFF -DUSE_OPENMP=ON ..; \
$(MAKE) VERBOSE=1; \
mkdir -p $(ROOTDIR)/lib; \
cp $(TVM_PATH)/build/libtvm_runtime.so $(ROOTDIR)/lib/libtvm_runtime.so; \
ls $(ROOTDIR)/lib; \
cd $(ROOTDIR)
TVM_OP_COMPILE_OPTIONS = -o $(ROOTDIR)/lib --config $(ROOTDIR)/lib/tvmop.conf
ifneq ($(CUDA_ARCH),)
TVM_OP_COMPILE_OPTIONS += --cuda-arch "$(CUDA_ARCH)"
endif
lib/libtvmop.so: lib/libtvm_runtime.so $(wildcard contrib/tvmop/*/*.py contrib/tvmop/*.py)
echo "Compile TVM operators"
@mkdir -p $(@D)
PYTHONPATH=$(TVM_PATH)/python:$(TVM_PATH)/topi/python:$(ROOTDIR)/contrib \
LD_LIBRARY_PATH=$(ROOTDIR)/lib \
python3 $(ROOTDIR)/contrib/tvmop/compile.py $(TVM_OP_COMPILE_OPTIONS)
NNVM_INC = $(wildcard $(NNVM_PATH)/include/*/*.h)
NNVM_SRC = $(wildcard $(NNVM_PATH)/src/*/*/*.cc $(NNVM_PATH)/src/*/*.cc $(NNVM_PATH)/src/*.cc)
$(NNVM_PATH)/lib/libnnvm.a: $(NNVM_INC) $(NNVM_SRC)
+ cd $(NNVM_PATH); $(MAKE) lib/libnnvm.a DMLC_CORE_PATH=$(DMLC_CORE); cd $(ROOTDIR)
bin/im2rec: tools/im2rec.cc $(ALLX_DEP)
$(BIN) :
@mkdir -p $(@D)
$(CXX) $(CFLAGS) -std=c++11 -o $@ $(filter %.cpp %.o %.c %.a %.cc, $^) $(LDFLAGS)
# CPP Package
ifeq ($(USE_CPP_PACKAGE), 1)
include cpp-package/cpp-package.mk
CFLAGS += -DMXNET_USE_CPP_PACKAGE=1
endif
include mkldnn.mk
include tests/cpp/unittest.mk
extra-packages: $(EXTRA_PACKAGES)
test: $(TEST)
lint: cpplint rcpplint jnilint pylint
cpplint:
3rdparty/dmlc-core/scripts/lint.py mxnet cpp include src plugin cpp-package tests \
--exclude_path src/operator/contrib/ctc_include include/mkldnn
pylint:
python3 -m pylint --rcfile=$(ROOTDIR)/ci/other/pylintrc --ignore-patterns=".*\.so$$,.*\.dll$$,.*\.dylib$$" python/mxnet
# MXNet extension dynamically loading libraries
EXT_LIBS = build/libcustomop_lib.so build/libtransposecsr_lib.so build/libtransposerowsp_lib.so build/libsubgraph_lib.so build/libpass_lib.so
ifeq ($(USE_CUDA), 1)
EXT_LIBS += build/libcustomop_gpu_lib.so
endif
extension_libs: $(EXT_LIBS)
build/libcustomop_lib.so:
@mkdir -p $(@D)
$(CXX) -shared -fPIC -std=c++11 example/extensions/lib_custom_op/gemm_lib.cc src/lib_api.cc -o $@ -I include
build/libcustomop_gpu_lib.so:
@mkdir -p $(@D)
$(NVCC) -shared -std=c++11 -Xcompiler -fPIC example/extensions/lib_custom_op/relu_lib.cu src/lib_api.cc -o $@ -I include
build/libsubgraph_lib.so:
@mkdir -p $(@D)
$(CXX) -shared -fPIC -std=c++11 example/extensions/lib_subgraph/subgraph_lib.cc src/lib_api.cc -o $@ -I include
build/libtransposecsr_lib.so:
@mkdir -p $(@D)
$(CXX) -shared -fPIC -std=c++11 example/extensions/lib_custom_op/transposecsr_lib.cc src/lib_api.cc -o $@ -I include
build/libtransposerowsp_lib.so:
@mkdir -p $(@D)
$(CXX) -shared -fPIC -std=c++11 example/extensions/lib_custom_op/transposerowsp_lib.cc src/lib_api.cc -o $@ -I include
build/libcustomop_gpu_lib.so:
@mkdir -p $(@D)
$(NVCC) -shared -std=c++11 -Xcompiler -fPIC example/extensions/lib_custom_op/relu_lib.cu src/lib_api.cc -o $@ -I include
build/libsubgraph_lib.so:
@mkdir -p $(@D)
$(CXX) -shared -fPIC -std=c++11 example/extensions/lib_subgraph/subgraph_lib.cc src/lib_api.cc -o $@ -I include
build/libpass_lib.so:
@mkdir -p $(@D)
$(CXX) -shared -fPIC -std=c++11 example/extensions/lib_pass/pass_lib.cc src/lib_api.cc -o $@ -I include
# Cython build
cython:
cd python; $(PYTHON) setup.py build_ext --inplace --with-cython
cython3:
cd python; python3 setup.py build_ext --inplace --with-cython
cyclean:
rm -rf python/mxnet/*/*.so python/mxnet/*/*.cpp
scalaclean:
(cd $(ROOTDIR)/scala-package && mvn clean)
scalapkg:
(cd $(ROOTDIR)/scala-package && mvn install -DskipTests)
scalainstall:
(cd $(ROOTDIR)/scala-package && mvn install)
scalaunittest:
(cd $(ROOTDIR)/scala-package && mvn install)
scalaintegrationtest:
(cd $(ROOTDIR)/scala-package && mvn integration-test -DskipTests=false)
jnilint:
3rdparty/dmlc-core/scripts/lint.py mxnet-jnicpp cpp scala-package/native/src --exclude_path scala-package/native/src/main/native/org_apache_mxnet_native_c_api.h
rclean:
$(RM) -r R-package/src/image_recordio.h R-package/NAMESPACE R-package/man R-package/R/mxnet_generated.R \
R-package/inst R-package/src/*.o R-package/src/*.so mxnet_*.tar.gz
build/rat/apache-rat-0.13/apache-rat-0.13.jar:
mkdir -p build/rat
cd build/rat; \
wget http://mirror.metrocast.net/apache//creadur/apache-rat-0.13/apache-rat-0.13-bin.zip; \
unzip apache-rat-0.13-bin.zip;
ratcheck: build/rat/apache-rat-0.13/apache-rat-0.13.jar
exec 5>&1; \
RAT_JAR=build/rat/apache-rat-0.13/apache-rat-0.13.jar; \
OUTPUT=$(java -jar $(RAT_JAR) -E tests/nightly/apache_rat_license_check/rat-excludes -d .|tee >(cat - >&5)); \
ERROR_MESSAGE="Printing headers for text files without a valid license header"; \
echo "-------Process The Output-------"; \
if [[ $OUTPUT =~ $ERROR_MESSAGE ]]; then \
echo "ERROR: RAT Check detected files with unknown licenses. Please fix and run test again!"; \
exit 1; \
else \
echo "SUCCESS: There are no files with an Unknown License."; \
fi
ifneq ($(EXTRA_OPERATORS),)
clean: rclean cyclean $(EXTRA_PACKAGES_CLEAN)
$(RM) -r build lib bin deps *~ */*~ */*/*~ */*/*/*~
(cd scala-package && mvn clean) || true
cd $(DMLC_CORE); $(MAKE) clean; cd -
cd $(PS_PATH); $(MAKE) clean; cd -
cd $(NNVM_PATH); $(MAKE) clean; cd -
cd $(TVM_PATH); $(MAKE) clean; cd -
cd $(AMALGAMATION_PATH); $(MAKE) clean; cd -
$(RM) -r $(patsubst %, %/*.d, $(EXTRA_OPERATORS)) $(patsubst %, %/*/*.d, $(EXTRA_OPERATORS))
$(RM) -r $(patsubst %, %/*.o, $(EXTRA_OPERATORS)) $(patsubst %, %/*/*.o, $(EXTRA_OPERATORS))
else
clean: rclean mkldnn_clean cyclean testclean $(EXTRA_PACKAGES_CLEAN)
$(RM) -r build lib bin *~ */*~ */*/*~ */*/*/*~
(cd scala-package && mvn clean) || true
cd $(DMLC_CORE); $(MAKE) clean; cd -
cd $(PS_PATH); $(MAKE) clean; cd -
cd $(NNVM_PATH); $(MAKE) clean; cd -
cd $(TVM_PATH); $(MAKE) clean; cd -
cd $(AMALGAMATION_PATH); $(MAKE) clean; cd -
endif
clean_all: clean
-include build/*.d
-include build/*/*.d
-include build/*/*/*.d
-include build/*/*/*/*.d
ifneq ($(EXTRA_OPERATORS),)
-include $(patsubst %, %/*.d, $(EXTRA_OPERATORS)) $(patsubst %, %/*/*.d, $(EXTRA_OPERATORS))
endif