| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| ROOTDIR = $(CURDIR) |
| TPARTYDIR = $(ROOTDIR)/3rdparty |
| |
| ifeq ($(OS),Windows_NT) |
| UNAME_S := Windows |
| else |
| UNAME_S := $(shell uname -s) |
| UNAME_P := $(shell uname -p) |
| endif |
| |
| ifndef config |
| ifdef CXXNET_CONFIG |
| config = $(CXXNET_CONFIG) |
| else ifneq ("$(wildcard ./config.mk)","") |
| config = config.mk |
| else |
| config = make/config.mk |
| endif |
| endif |
| |
| ifndef DMLC_CORE |
| DMLC_CORE = $(TPARTYDIR)/dmlc-core |
| endif |
| CORE_INC = $(wildcard $(DMLC_CORE)/include/*/*.h) |
| |
| ifndef NNVM_PATH |
| NNVM_PATH = $(TPARTYDIR)/tvm/nnvm |
| endif |
| |
| ifndef DLPACK_PATH |
| DLPACK_PATH = $(ROOTDIR)/3rdparty/dlpack |
| endif |
| |
| ifndef AMALGAMATION_PATH |
| AMALGAMATION_PATH = $(ROOTDIR)/amalgamation |
| endif |
| |
| ifndef TVM_PATH |
| TVM_PATH = $(TPARTYDIR)/tvm |
| endif |
| |
| ifndef LLVM_PATH |
| LLVM_PATH = $(TVM_PATH)/build/llvm |
| endif |
| |
| ifneq ($(USE_OPENMP), 1) |
| export NO_OPENMP = 1 |
| endif |
| |
| # use customized config file |
| include $(config) |
| |
| ifndef USE_MKLDNN |
| ifneq ($(UNAME_S), Darwin) |
| ifneq ($(UNAME_S), Windows) |
| ifeq ($(UNAME_P), x86_64) |
| USE_MKLDNN=1 |
| endif |
| endif |
| endif |
| endif |
| |
| ifeq ($(USE_MKL2017), 1) |
| $(warning "USE_MKL2017 is deprecated. We will switch to USE_MKLDNN.") |
| USE_MKLDNN=1 |
| endif |
| |
| ifeq ($(USE_MKLDNN), 1) |
| MKLDNNROOT = $(ROOTDIR)/3rdparty/mkldnn/build/install |
| endif |
| |
| ifndef USE_INTGEMM |
| ifeq ($(UNAME_P), x86_64) |
| COMPILER := $(shell $(CXX) --version |head -n 1 |cut -d " " -f 1) |
| COMPILER_VERSION := $(shell $(CXX) -dumpversion |cut -d . -f 1) |
| ifeq ($(COMPILER), clang) |
| USE_INTGEMM=1 |
| endif |
| ifeq ($(COMPILER), Apple) |
| USE_INTGEMM=1 |
| endif |
| # If it's not clang and not Apple clang, it's probably gcc and we need at least 5. |
| # gcc --version gives the name of the program it was called with, which makes it hard to detect. |
| COMPILER_VERSION_GE_5 := $(shell expr $(COMPILER_VERSION) \>= 5) |
| ifeq ($(COMPILER_VERSION_GE_5), 1) |
| USE_INTGEMM=1 |
| endif |
| endif |
| endif |
| |
| include $(TPARTYDIR)/mshadow/make/mshadow.mk |
| include $(DMLC_CORE)/make/dmlc.mk |
| |
| # all tge possible warning tread |
| WARNFLAGS= -Wall -Wsign-compare |
| CFLAGS = -DMSHADOW_FORCE_STREAM $(WARNFLAGS) |
| # use old thread local implementation in DMLC-CORE |
| CFLAGS += -DDMLC_MODERN_THREAD_LOCAL=0 |
| # disable stack trace in exception by default. |
| CFLAGS += -DDMLC_LOG_STACK_TRACE_SIZE=0 |
| CFLAGS += -DDMLC_LOG_FATAL_THROW=1 |
| |
| ifeq ($(DEV), 1) |
| # Excluded from Werror: |
| # 1) variables used in '#pragma omp parallel' are considered unused |
| CFLAGS += -g -Werror -Wno-error=unused-variable -Wno-error=maybe-uninitialized -Wno-error=unused-function |
| NVCCFLAGS += -Werror cross-execution-space-call |
| endif |
| |
| # CFLAGS for debug |
| ifeq ($(DEBUG), 1) |
| CFLAGS += -g -O0 -D_GLIBCXX_ASSERTIONS |
| else |
| CFLAGS += -O3 -DNDEBUG=1 |
| endif |
| CFLAGS += -I$(TPARTYDIR)/mshadow/ -I$(TPARTYDIR)/dmlc-core/include -fPIC -I$(NNVM_PATH)/include -I$(DLPACK_PATH)/include -I$(TPARTYDIR)/tvm/include -Iinclude $(MSHADOW_CFLAGS) |
| LDFLAGS = -pthread -ldl $(MSHADOW_LDFLAGS) $(DMLC_LDFLAGS) |
| |
| # please note that when you enable this, you might run into an linker not being able to work properly due to large code injection. |
| # you can find more information here https://github.com/apache/incubator-mxnet/issues/15971 |
| ifeq ($(ENABLE_TESTCOVERAGE), 1) |
| CFLAGS += --coverage |
| LDFLAGS += --coverage |
| endif |
| |
| ifeq ($(USE_NVTX), 1) |
| CFLAGS += -DMXNET_USE_NVTX=1 |
| LDFLAGS += -lnvToolsExt |
| endif |
| |
| ifeq ($(USE_TENSORRT), 1) |
| CFLAGS += -I$(ROOTDIR) -I$(TPARTYDIR) -DONNX_NAMESPACE=$(ONNX_NAMESPACE) -DMXNET_USE_TENSORRT=1 |
| LDFLAGS += -lprotobuf -pthread -lonnx -lonnx_proto -lnvonnxparser -lnvonnxparser_runtime -lnvinfer -lnvinfer_plugin |
| endif |
| # -L/usr/local/lib |
| |
| ifeq ($(DEBUG), 1) |
| NVCCFLAGS += -std=c++11 -Xcompiler -D_FORCE_INLINES -g -G -O0 -ccbin $(CXX) $(MSHADOW_NVCCFLAGS) |
| else |
| NVCCFLAGS += -std=c++11 -Xcompiler -D_FORCE_INLINES -O3 -ccbin $(CXX) $(MSHADOW_NVCCFLAGS) |
| endif |
| |
| # CFLAGS for segfault logger |
| ifeq ($(USE_SIGNAL_HANDLER), 1) |
| CFLAGS += -DMXNET_USE_SIGNAL_HANDLER=1 |
| endif |
| |
| # Caffe Plugin |
| ifdef CAFFE_PATH |
| CFLAGS += -DMXNET_USE_CAFFE=1 |
| endif |
| |
| ifndef LINT_LANG |
| LINT_LANG = "all" |
| endif |
| |
| ifeq ($(USE_MKLDNN), 1) |
| CFLAGS += -DMXNET_USE_MKLDNN=1 |
| CFLAGS += -I$(ROOTDIR)/src/operator/nn/mkldnn/ |
| CFLAGS += -I$(MKLDNNROOT)/include |
| LIB_DEP += $(MKLDNNROOT)/lib/libdnnl.a |
| endif |
| |
| # Use MKL's layernorm implementation. Only has an impact if MKL is compiled in. |
| ifeq ($(USE_MKL_LAYERNORM), 1) |
| CFLAGS += -DMXNET_USE_MKL_LAYERNORM=1 |
| endif |
| |
| # setup opencv |
| ifeq ($(USE_OPENCV), 1) |
| CFLAGS += -DMXNET_USE_OPENCV=1 |
| ifneq ($(filter-out NONE, $(USE_OPENCV_INC_PATH)),) |
| CFLAGS += -I$(USE_OPENCV_INC_PATH)/include |
| ifeq ($(filter-out NONE, $(USE_OPENCV_LIB_PATH)),) |
| $(error Please add the path of OpenCV shared library path into `USE_OPENCV_LIB_PATH`, when `USE_OPENCV_INC_PATH` is not NONE) |
| endif |
| LDFLAGS += -L$(USE_OPENCV_LIB_PATH) |
| ifneq ($(wildcard $(USE_OPENCV_LIB_PATH)/libopencv_imgcodecs.*),) |
| LDFLAGS += -lopencv_imgcodecs |
| endif |
| ifneq ($(wildcard $(USE_OPENCV_LIB_PATH)/libopencv_highgui.*),) |
| LDFLAGS += -lopencv_highgui |
| endif |
| else |
| ifeq ("$(shell pkg-config --exists opencv4; echo $$?)", "0") |
| OPENCV_LIB = opencv4 |
| else |
| OPENCV_LIB = opencv |
| endif |
| CFLAGS += $(shell pkg-config --cflags $(OPENCV_LIB)) |
| LDFLAGS += $(shell pkg-config --libs-only-L $(OPENCV_LIB)) |
| LDFLAGS += $(filter -lopencv_imgcodecs -lopencv_highgui, $(shell pkg-config --libs-only-l $(OPENCV_LIB))) |
| endif |
| LDFLAGS += -lopencv_imgproc -lopencv_core |
| BIN += bin/im2rec |
| else |
| CFLAGS += -DMXNET_USE_OPENCV=0 |
| endif |
| |
| ifeq ($(USE_OPENMP), 1) |
| CFLAGS += -fopenmp |
| CFLAGS += -DMXNET_USE_OPENMP=1 |
| endif |
| |
| ifeq ($(USE_NNPACK), 1) |
| CFLAGS += -DMXNET_USE_NNPACK=1 |
| LDFLAGS += -lnnpack |
| endif |
| |
| ifeq ($(USE_OPERATOR_TUNING), 1) |
| CFLAGS += -DMXNET_USE_OPERATOR_TUNING=1 |
| endif |
| |
| ifeq ($(USE_INT64_TENSOR_SIZE), 1) |
| CFLAGS += -DMSHADOW_INT64_TENSOR_SIZE=1 |
| else |
| CFLAGS += -DMSHADOW_INT64_TENSOR_SIZE=0 |
| endif |
| # verify existence of separate lapack library when using blas/openblas/atlas |
| # switch off lapack support in case it can't be found |
| # issue covered with this |
| # - for Ubuntu 14.04 or lower, lapack is not automatically installed with openblas |
| # - for Ubuntu, installing atlas will not automatically install the atlas provided lapack library |
| # - for rhel7.2, try installing the package `lapack-static` via yum will dismiss this warning. |
| # silently switching lapack off instead of letting the build fail because of backward compatibility |
| ifeq ($(USE_LAPACK), 1) |
| ifeq ($(USE_BLAS),$(filter $(USE_BLAS),blas openblas atlas mkl)) |
| ifeq (,$(wildcard $(USE_LAPACK_PATH)/liblapack.a)) |
| ifeq (,$(wildcard $(USE_LAPACK_PATH)/liblapack.so)) |
| ifeq (,$(wildcard $(USE_LAPACK_PATH)/liblapack.dylib)) |
| ifeq (,$(wildcard /lib/liblapack.a)) |
| ifeq (,$(wildcard /lib/liblapack.so)) |
| ifeq (,$(wildcard /usr/lib/liblapack.a)) |
| ifeq (,$(wildcard /usr/lib/liblapack.so)) |
| ifeq (,$(wildcard /usr/lib/liblapack.dylib)) |
| ifeq (,$(wildcard /usr/lib64/liblapack.a)) |
| ifeq (,$(wildcard /usr/lib64/liblapack.so)) |
| USE_LAPACK = 0 |
| $(warning "USE_LAPACK disabled because libraries were not found") |
| endif |
| endif |
| endif |
| endif |
| endif |
| endif |
| endif |
| endif |
| endif |
| endif |
| endif |
| endif |
| |
| # lapack settings. |
| ifeq ($(USE_LAPACK), 1) |
| ifneq ($(USE_LAPACK_PATH), ) |
| LDFLAGS += -L$(USE_LAPACK_PATH) |
| endif |
| ifeq ($(USE_BLAS),$(filter $(USE_BLAS),blas openblas atlas mkl)) |
| LDFLAGS += -llapack |
| endif |
| CFLAGS += -DMXNET_USE_LAPACK |
| endif |
| |
| ifeq ($(USE_CUDNN), 1) |
| CFLAGS += -DMSHADOW_USE_CUDNN=1 |
| LDFLAGS += -lcudnn |
| endif |
| |
| ifeq ($(USE_BLAS), openblas) |
| CFLAGS += -DMXNET_USE_BLAS_OPEN=1 |
| else ifeq ($(USE_BLAS), atlas) |
| CFLAGS += -DMXNET_USE_BLAS_ATLAS=1 |
| else ifeq ($(USE_BLAS), mkl) |
| CFLAGS += -DMXNET_USE_BLAS_MKL=1 |
| else ifeq ($(USE_BLAS), apple) |
| CFLAGS += -DMXNET_USE_BLAS_APPLE=1 |
| endif |
| |
| # whether to use F16C instruction set extension for fast fp16 compute on CPU |
| # if cross compiling you may want to explicitly turn it off if target system does not support it |
| ifndef USE_F16C |
| ifneq ($(OS),Windows_NT) |
| detected_OS := $(shell uname -s) |
| ifeq ($(detected_OS),Darwin) |
| F16C_SUPP = $(shell sysctl -a | grep machdep.cpu.features | grep F16C) |
| endif |
| ifeq ($(detected_OS),Linux) |
| F16C_SUPP = $(shell cat /proc/cpuinfo | grep flags | grep f16c) |
| endif |
| ifneq ($(strip $(F16C_SUPP)),) |
| USE_F16C=1 |
| else |
| USE_F16C=0 |
| endif |
| endif |
| # if OS is Windows, check if your processor and compiler support F16C architecture. |
| # One way to check if processor supports it is to download the tool |
| # https://docs.microsoft.com/en-us/sysinternals/downloads/coreinfo. |
| # If coreinfo -c shows F16C and compiler supports it, |
| # then you can set USE_F16C=1 explicitly to leverage that capability" |
| endif |
| |
| # gperftools malloc library (tcmalloc) |
| ifeq ($(USE_GPERFTOOLS), 1) |
| FIND_LIBFILEEXT=so |
| ifeq ($(USE_GPERFTOOLS_STATIC), 1) |
| FIND_LIBFILEEXT=a |
| endif |
| FIND_LIBFILE=$(wildcard $(USE_GPERFTOOLS_PATH)/libtcmalloc.$(FIND_LIBFILEEXT)) |
| ifeq (,$(FIND_LIBFILE)) |
| FIND_LIBFILE=$(wildcard /lib/libtcmalloc.$(FIND_LIBFILEEXT)) |
| ifeq (,$(FIND_LIBFILE)) |
| FIND_LIBFILE=$(wildcard /usr/lib/libtcmalloc.$(FIND_LIBFILEEXT)) |
| ifeq (,$(FIND_LIBFILE)) |
| FIND_LIBFILE=$(wildcard /usr/local/lib/libtcmalloc.$(FIND_LIBFILEEXT)) |
| ifeq (,$(FIND_LIBFILE)) |
| FIND_LIBFILE=$(wildcard /usr/lib64/libtcmalloc.$(FIND_LIBFILEEXT)) |
| ifeq (,$(FIND_LIBFILE)) |
| USE_GPERFTOOLS=0 |
| endif |
| endif |
| endif |
| endif |
| endif |
| ifeq ($(USE_GPERFTOOLS), 1) |
| CFLAGS += -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free |
| LDFLAGS += $(FIND_LIBFILE) |
| endif |
| |
| # jemalloc malloc library (if not using gperftools) |
| else |
| ifeq ($(USE_JEMALLOC), 1) |
| FIND_LIBFILEEXT=so |
| ifeq ($(USE_JEMALLOC_STATIC), 1) |
| FIND_LIBFILEEXT=a |
| endif |
| FIND_LIBFILE=$(wildcard $(USE_JEMALLOC_PATH)/libjemalloc.$(FIND_LIBFILEEXT)) |
| ifeq (,$(FIND_LIBFILE)) |
| FIND_LIBFILE=$(wildcard /lib/libjemalloc.$(FIND_LIBFILEEXT)) |
| ifeq (,$(FIND_LIBFILE)) |
| FIND_LIBFILE=$(wildcard /usr/lib/libjemalloc.$(FIND_LIBFILEEXT)) |
| ifeq (,$(FIND_LIBFILE)) |
| FIND_LIBFILE=$(wildcard /usr/local/lib/libjemalloc.$(FIND_LIBFILEEXT)) |
| ifeq (,$(FIND_LIBFILE)) |
| FIND_LIBFILE=$(wildcard /usr/lib/x86_64-linux-gnu/libjemalloc.$(FIND_LIBFILEEXT)) |
| ifeq (,$(FIND_LIBFILE)) |
| FIND_LIBFILE=$(wildcard /usr/lib64/libjemalloc.$(FIND_LIBFILEEXT)) |
| ifeq (,$(FIND_LIBFILE)) |
| USE_JEMALLOC=0 |
| endif |
| endif |
| endif |
| endif |
| endif |
| endif |
| ifeq ($(USE_JEMALLOC), 1) |
| CFLAGS += -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc \ |
| -fno-builtin-free -DUSE_JEMALLOC |
| LDFLAGS += $(FIND_LIBFILE) |
| endif |
| endif |
| endif |
| |
| # If not using tcmalloc or jemalloc, print a warning (user should consider installing) |
| ifneq ($(USE_GPERFTOOLS), 1) |
| ifneq ($(USE_JEMALLOC), 1) |
| $(warning WARNING: Significant performance increases can be achieved by installing and \ |
| enabling gperftools or jemalloc development packages) |
| endif |
| endif |
| |
| ifeq ($(USE_THREADED_ENGINE), 1) |
| CFLAGS += -DMXNET_USE_THREADED_ENGINE |
| endif |
| |
| ifneq ($(ADD_CFLAGS), NONE) |
| CFLAGS += $(ADD_CFLAGS) |
| endif |
| |
| ifneq ($(ADD_LDFLAGS), NONE) |
| LDFLAGS += $(ADD_LDFLAGS) |
| endif |
| |
| ifeq ($(NVCC), NONE) |
| # If NVCC has not been manually defined, use the CUDA_PATH bin dir. |
| ifneq ($(USE_CUDA_PATH), NONE) |
| NVCC=$(USE_CUDA_PATH)/bin/nvcc |
| endif |
| endif |
| |
| # Guard against displaying nvcc info messages to users not using CUDA. |
| ifeq ($(USE_CUDA), 1) |
| # Get AR version, compare with expected ar version and find bigger and smaller version of the two |
| AR_VERSION := $(shell ar --version | egrep -o "([0-9]{1,}\.)+[0-9]{1,}") |
| EXPECTED_AR_VERSION := $(shell echo "2.27") |
| LARGE_VERSION := $(shell printf '%s\n' "$(AR_VERSION)" "$(EXPECTED_AR_VERSION)" | sort -V | tail -n 1) |
| SMALL_VERSION := $(shell printf '%s\n' "$(AR_VERSION)" "$(EXPECTED_AR_VERSION)" | sort -V | head -n 1) |
| |
| # If NVCC is not at the location specified, use CUDA_PATH instead. |
| ifeq ("$(wildcard $(NVCC))","") |
| ifneq ($(USE_CUDA_PATH), NONE) |
| NVCC=$(USE_CUDA_PATH)/bin/nvcc |
| |
| # if larger version is the expected one and larger != smaller |
| # this means ar version is less than expected version and user needs to be warned |
| ifeq ($(LARGE_VERSION), $(EXPECTED_AR_VERSION)) |
| ifneq ($(LARGE_VERSION), $(SMALL_VERSION)) |
| define n |
| |
| |
| endef |
| |
| $(warning WARNING: Archive utility: ar version being used is less than 2.27.0. $n \ |
| Note that with USE_CUDA=1 flag and USE_CUDNN=1 this is known to cause problems. $n \ |
| For more info see: https://github.com/apache/incubator-mxnet/issues/15084) |
| $(shell sleep 5) |
| endif |
| endif |
| $(info INFO: nvcc was not found on your path) |
| $(info INFO: Using $(NVCC) as nvcc path) |
| else |
| $(warning WARNING: could not find nvcc compiler, the specified path was: $(NVCC)) |
| endif |
| endif |
| endif |
| |
| # Sets 'CUDA_ARCH', which determines the GPU architectures supported |
| # by the compiled kernels. Users can edit the KNOWN_CUDA_ARCHS list below |
| # to remove archs they don't wish to support to speed compilation, or they can |
| # pre-set the CUDA_ARCH args in config.mk to a non-null value for full control. |
| # |
| # For archs in this list, nvcc will create a fat-binary that will include |
| # the binaries (SASS) for all architectures supported by the installed version |
| # of the cuda toolkit, plus the assembly (PTX) for the most recent such architecture. |
| # If these kernels are then run on a newer-architecture GPU, the binary will |
| # be JIT-compiled by the updated driver from the included PTX. |
| ifeq ($(USE_CUDA), 1) |
| ifeq ($(CUDA_ARCH),) |
| KNOWN_CUDA_ARCHS := 30 35 50 52 60 61 70 75 80 |
| # Run nvcc on a zero-length file to check architecture-level support. |
| # Create args to include SASS in the fat binary for supported levels. |
| CUDA_ARCH := $(foreach arch,$(KNOWN_CUDA_ARCHS), \ |
| $(shell $(NVCC) -arch=sm_$(arch) -E --x cu /dev/null >/dev/null 2>&1 && \ |
| echo -gencode arch=compute_$(arch),code=sm_$(arch))) |
| # Convert a trailing "code=sm_NN" to "code=[sm_NN,compute_NN]" to also |
| # include the PTX of the most recent arch in the fat-binaries for |
| # forward compatibility with newer GPUs. |
| CUDA_ARCH := $(shell echo $(CUDA_ARCH) | sed 's/sm_\([0-9]*\)$$/[sm_\1,compute_\1]/') |
| # Add fat binary compression if supported by nvcc. |
| COMPRESS := --fatbin-options -compress-all |
| CUDA_ARCH += $(shell $(NVCC) -cuda $(COMPRESS) --x cu /dev/null -o /dev/null >/dev/null 2>&1 && \ |
| echo $(COMPRESS)) |
| endif |
| $(info Running CUDA_ARCH: $(CUDA_ARCH)) |
| endif |
| |
| # ps-lite |
| PS_PATH=$(ROOTDIR)/3rdparty/ps-lite |
| DEPS_PATH=$(shell pwd)/deps |
| include $(PS_PATH)/make/ps.mk |
| ifeq ($(USE_DIST_KVSTORE), 1) |
| CFLAGS += -DMXNET_USE_DIST_KVSTORE -I$(PS_PATH)/include -I$(DEPS_PATH)/include |
| LIB_DEP += $(PS_PATH)/build/libps.a |
| LDFLAGS += $(PS_LDFLAGS_A) |
| endif |
| |
| .PHONY: clean all extra-packages test lint clean_all rcpplint rcppexport roxygen\ |
| cython3 cython cyclean |
| |
| all: lib/libmxnet.a lib/libmxnet.so $(BIN) extra-packages extension_libs |
| |
| SRC = $(wildcard src/*/*/*/*.cc src/*/*/*.cc src/*/*.cc src/*.cc) |
| |
| ifeq ($(USE_INTGEMM), 1) |
| ifndef INTGEMM_PATH |
| INTGEMM_PATH = build/3rdparty/intgemm |
| endif |
| CFLAGS += -DMXNET_USE_INTGEMM=1 |
| LIB_DEP += $(INTGEMM_PATH)/libintgemm.a |
| |
| # Download intgemm if it isn't already |
| $(INTGEMM_PATH): |
| @mkdir -p $(INTGEMM_PATH) |
| rm -rf $(INTGEMM_PATH) |
| git clone https://github.com/kpu/intgemm $(INTGEMM_PATH) |
| cd $(INTGEMM_PATH) && git checkout -q 4172dcc209e6793dd920dec9cf9c9fc81605bd9d |
| |
| $(INTGEMM_PATH)/compile_test_avx512bw.cc: $(INTGEMM_PATH) |
| @ |
| $(INTGEMM_PATH)/compile_test_avx512vnni.cc: $(INTGEMM_PATH) |
| @ |
| $(INTGEMM_PATH)/intgemm/intgemm.cc: $(INTGEMM_PATH) |
| @ |
| |
| # Compiler tests for AVX512BW and AVX512VNNI. |
| $(INTGEMM_PATH)/intgemm/intgemm_config.h: $(INTGEMM_PATH)/compile_test_avx512bw.cc $(INTGEMM_PATH)/compile_test_avx512vnni.cc |
| echo '#pragma once' >$(INTGEMM_PATH)/intgemm/intgemm_config.h |
| $(CXX) $(CFLAGS) $(INTGEMM_PATH)/compile_test_avx512bw.cc 2>/dev/null && echo \#define INTGEMM_COMPILER_SUPPORTS_AVX512BW >>$(INTGEMM_PATH)/intgemm/intgemm_config.h || echo Your compiler is missing AVX512BW support |
| $(CXX) $(CFLAGS) $(INTGEMM_PATH)/compile_test_avx512vnni.cc 2>/dev/null && echo \#define INTGEMM_COMPILER_SUPPORTS_AVX512VNNI >>$(INTGEMM_PATH)/intgemm/intgemm_config.h || echo Your compiler is missing AVX512VNNI support |
| |
| $(INTGEMM_PATH)/intgemm/intgemm.o: $(INTGEMM_PATH)/intgemm/intgemm_config.h $(INTGEMM_PATH)/intgemm/intgemm.cc $(wildcard $(INTGEMM_PATH)/intgemm/*.h $(INTGEMM_PATH)/intgemm/*/*.h) |
| $(CXX) $(CFLAGS) -I$(INTGEMM_PATH) -std=c++11 -c $(INTGEMM_PATH)/intgemm/intgemm.cc -o $@ |
| |
| $(INTGEMM_PATH)/libintgemm.a: $(INTGEMM_PATH)/intgemm/intgemm.o |
| @mkdir -p $(@D) |
| ar crv $@ $(filter %.o, $?) |
| else |
| #If we're not using intgemm, remove the operators from src. |
| INTGEMM_OPS := $(wildcard src/operator/contrib/intgemm/*.cc) |
| SRC := $(filter-out $(INTGEMM_OPS),$(SRC)) |
| endif |
| |
| OBJ = $(patsubst %.cc, build/%.o, $(SRC)) |
| CUSRC = $(wildcard src/*/*/*/*.cu src/*/*/*.cu src/*/*.cu src/*.cu) |
| CUOBJ = $(patsubst %.cu, build/%_gpu.o, $(CUSRC)) |
| |
| ifeq ($(USE_TVM_OP), 1) |
| LIB_DEP += lib/libtvm_runtime.so lib/libtvmop.so |
| CFLAGS += -I$(TVM_PATH)/include -DMXNET_USE_TVM_OP=1 |
| LDFLAGS += -L$(ROOTDIR)/lib -ltvm_runtime -Wl,-rpath,'$${ORIGIN}' |
| |
| TVM_USE_CUDA := OFF |
| ifeq ($(USE_CUDA), 1) |
| TVM_USE_CUDA := ON |
| ifneq ($(USE_CUDA_PATH), NONE) |
| TVM_USE_CUDA := $(USE_CUDA_PATH) |
| endif |
| endif |
| endif |
| |
| # extra operators |
| ifneq ($(EXTRA_OPERATORS),) |
| EXTRA_SRC = $(wildcard $(patsubst %, %/*.cc, $(EXTRA_OPERATORS)) $(patsubst %, %/*/*.cc, $(EXTRA_OPERATORS))) |
| EXTRA_OBJ = $(patsubst %.cc, %.o, $(EXTRA_SRC)) |
| EXTRA_CUSRC = $(wildcard $(patsubst %, %/*.cu, $(EXTRA_OPERATORS)) $(patsubst %, %/*/*.cu, $(EXTRA_OPERATORS))) |
| EXTRA_CUOBJ = $(patsubst %.cu, %_gpu.o, $(EXTRA_CUSRC)) |
| else |
| EXTRA_SRC = |
| EXTRA_OBJ = |
| EXTRA_CUSRC = |
| EXTRA_CUOBJ = |
| endif |
| |
| # plugin |
| PLUGIN_OBJ = |
| PLUGIN_CUOBJ = |
| include $(MXNET_PLUGINS) |
| |
| ifneq ($(UNAME_S), Windows) |
| ifeq ($(UNAME_S), Darwin) |
| WHOLE_ARCH= -all_load |
| NO_WHOLE_ARCH= -noall_load |
| else |
| WHOLE_ARCH= --whole-archive |
| NO_WHOLE_ARCH= --no-whole-archive |
| endif |
| endif |
| |
| # all dep |
| LIB_DEP += $(DMLC_CORE)/libdmlc.a $(NNVM_PATH)/lib/libnnvm.a |
| ALL_DEP = $(OBJ) $(EXTRA_OBJ) $(PLUGIN_OBJ) $(LIB_DEP) |
| |
| ifeq ($(USE_CUDA), 1) |
| CUDA_VERSION_MAJOR := $(shell $(NVCC) --version | grep "release" | awk '{print $$6}' | cut -c2- | cut -d '.' -f1) |
| ifeq ($(shell test $(CUDA_VERSION_MAJOR) -lt 11; echo $$?), 0) |
| CFLAGS += -I$(ROOTDIR)/3rdparty/nvidia_cub -DCUB_IGNORE_DEPRECATED_CPP_DIALECT |
| endif |
| |
| ALL_DEP += $(CUOBJ) $(EXTRA_CUOBJ) $(PLUGIN_CUOBJ) |
| LDFLAGS += -lcufft |
| ifeq ($(ENABLE_CUDA_RTC), 1) |
| LDFLAGS += -lcuda -lnvrtc |
| CFLAGS += -DMXNET_ENABLE_CUDA_RTC=1 |
| endif |
| # Make sure to add stubs as fallback in order to be able to build |
| # without full CUDA install (especially if run without nvidia-docker) |
| LDFLAGS += -L/usr/local/cuda/lib64/stubs |
| ifeq ($(USE_NCCL), 1) |
| ifneq ($(USE_NCCL_PATH), NONE) |
| CFLAGS += -I$(USE_NCCL_PATH)/include |
| LDFLAGS += -L$(USE_NCCL_PATH)/lib |
| endif |
| LDFLAGS += -lnccl |
| CFLAGS += -DMXNET_USE_NCCL=1 |
| else |
| CFLAGS += -DMXNET_USE_NCCL=0 |
| endif |
| else |
| CFLAGS += -DMXNET_USE_NCCL=0 |
| endif |
| |
| ifeq ($(USE_LIBJPEG_TURBO), 1) |
| ifneq ($(USE_LIBJPEG_TURBO_PATH), NONE) |
| CFLAGS += -I$(USE_LIBJPEG_TURBO_PATH)/include |
| LDFLAGS += -L$(USE_LIBJPEG_TURBO_PATH)/lib |
| endif |
| LDFLAGS += -lturbojpeg |
| CFLAGS += -DMXNET_USE_LIBJPEG_TURBO=1 |
| else |
| CFLAGS += -DMXNET_USE_LIBJPEG_TURBO=0 |
| endif |
| |
| ifeq ($(CI), 1) |
| MAVEN_ARGS := -B |
| endif |
| |
| # For quick compile test, used smaller subset |
| ALLX_DEP= $(ALL_DEP) |
| |
| ifeq ($(USE_INTGEMM), 1) |
| # Enforce a dependency on $(INTGEMM_PATH)/intgemm/intgemm_config.h which is a generated header based on compiler support. |
| build/src/operator/contrib/intgemm/%.o: src/operator/contrib/intgemm/%.cc $(INTGEMM_PATH)/intgemm/intgemm_config.h | mkldnn |
| @mkdir -p $(@D) |
| $(CXX) -std=c++11 -c $(CFLAGS) -MMD -I$(INTGEMM_PATH) -Isrc/operator -c $< -o $@ |
| endif |
| |
| build/src/%.o: src/%.cc | mkldnn |
| @mkdir -p $(@D) |
| $(CXX) -std=c++11 -c $(CFLAGS) -MMD -c $< -o $@ |
| |
| build/src/%_gpu.o: src/%.cu | mkldnn |
| @mkdir -p $(@D) |
| $(NVCC) $(NVCCFLAGS) $(CUDA_ARCH) -Xcompiler "$(CFLAGS)" --generate-dependencies -MT build/src/$*_gpu.o $< >build/src/$*_gpu.d |
| $(NVCC) -c -o $@ $(NVCCFLAGS) $(CUDA_ARCH) -Xcompiler "$(CFLAGS)" $< |
| |
| # A nvcc bug cause it to generate "generic/xxx.h" dependencies from torch headers. |
| # Use CXX to generate dependency instead. |
| build/plugin/%_gpu.o: plugin/%.cu |
| @mkdir -p $(@D) |
| $(CXX) -std=c++11 $(CFLAGS) -MM -MT build/plugin/$*_gpu.o $< >build/plugin/$*_gpu.d |
| $(NVCC) -c -o $@ $(NVCCFLAGS) $(CUDA_ARCH) -Xcompiler "$(CFLAGS)" $< |
| |
| build/plugin/%.o: plugin/%.cc | mkldnn |
| @mkdir -p $(@D) |
| $(CXX) -std=c++11 -c $(CFLAGS) -MMD -c $< -o $@ |
| |
| %_gpu.o: %.cu |
| @mkdir -p $(@D) |
| $(NVCC) $(NVCCFLAGS) $(CUDA_ARCH) -Xcompiler "$(CFLAGS) -Isrc/operator" --generate-dependencies -MT $*_gpu.o $< >$*_gpu.d |
| $(NVCC) -c -o $@ $(NVCCFLAGS) $(CUDA_ARCH) -Xcompiler "$(CFLAGS) -Isrc/operator" $< |
| |
| %.o: %.cc $(CORE_INC) |
| @mkdir -p $(@D) |
| $(CXX) -std=c++11 -c $(CFLAGS) -MMD -Isrc/operator -c $< -o $@ |
| |
| # Set install path for libmxnet.so on Mac OS |
| ifeq ($(UNAME_S), Darwin) |
| LDFLAGS += -Wl,-install_name,@rpath/libmxnet.so |
| endif |
| |
| # NOTE: to statically link libmxnet.a we need the option |
| # --Wl,--whole-archive -lmxnet --Wl,--no-whole-archive |
| lib/libmxnet.a: $(ALLX_DEP) |
| @mkdir -p $(@D) |
| ar crv $@ $(filter %.o, $?) |
| |
| lib/libmxnet.so: $(ALLX_DEP) |
| @mkdir -p $(@D) |
| $(CXX) $(CFLAGS) -shared -o $@ $(filter-out %libnnvm.a, $(filter %.o %.a, $^)) $(LDFLAGS) \ |
| -Wl,${WHOLE_ARCH} $(filter %libnnvm.a, $^) -Wl,${NO_WHOLE_ARCH} |
| |
| $(PS_PATH)/build/libps.a: PSLITE |
| |
| PSLITE: |
| $(MAKE) CXX="$(CXX)" DEPS_PATH="$(DEPS_PATH)" -C $(PS_PATH) ps |
| |
| $(DMLC_CORE)/libdmlc.a: DMLCCORE |
| |
| DMLCCORE: |
| + cd $(DMLC_CORE); $(MAKE) libdmlc.a USE_SSE=$(USE_SSE) config=$(ROOTDIR)/$(config); cd $(ROOTDIR) |
| |
| lib/libtvm_runtime.so: |
| echo "Compile TVM" |
| @mkdir -p $(@D) |
| [ -e $(LLVM_PATH)/bin/llvm-config ] || sh $(ROOTDIR)/contrib/tvmop/prepare_tvm.sh; \ |
| cd $(TVM_PATH)/build; \ |
| cmake -DUSE_LLVM="$(LLVM_PATH)/bin/llvm-config" \ |
| -DUSE_SORT=OFF -DUSE_CUDA=$(TVM_USE_CUDA) -DUSE_CUDNN=OFF -DUSE_OPENMP=ON ..; \ |
| $(MAKE) VERBOSE=1; \ |
| mkdir -p $(ROOTDIR)/lib; \ |
| cp $(TVM_PATH)/build/libtvm_runtime.so $(ROOTDIR)/lib/libtvm_runtime.so; \ |
| ls $(ROOTDIR)/lib; \ |
| cd $(ROOTDIR) |
| |
| TVM_OP_COMPILE_OPTIONS = -o $(ROOTDIR)/lib --config $(ROOTDIR)/lib/tvmop.conf |
| ifneq ($(CUDA_ARCH),) |
| TVM_OP_COMPILE_OPTIONS += --cuda-arch "$(CUDA_ARCH)" |
| endif |
| lib/libtvmop.so: lib/libtvm_runtime.so $(wildcard contrib/tvmop/*/*.py contrib/tvmop/*.py) |
| echo "Compile TVM operators" |
| @mkdir -p $(@D) |
| PYTHONPATH=$(TVM_PATH)/python:$(TVM_PATH)/topi/python:$(ROOTDIR)/contrib \ |
| LD_LIBRARY_PATH=$(ROOTDIR)/lib \ |
| python3 $(ROOTDIR)/contrib/tvmop/compile.py $(TVM_OP_COMPILE_OPTIONS) |
| |
| NNVM_INC = $(wildcard $(NNVM_PATH)/include/*/*.h) |
| NNVM_SRC = $(wildcard $(NNVM_PATH)/src/*/*/*.cc $(NNVM_PATH)/src/*/*.cc $(NNVM_PATH)/src/*.cc) |
| $(NNVM_PATH)/lib/libnnvm.a: $(NNVM_INC) $(NNVM_SRC) |
| + cd $(NNVM_PATH); $(MAKE) lib/libnnvm.a DMLC_CORE_PATH=$(DMLC_CORE); cd $(ROOTDIR) |
| |
| bin/im2rec: tools/im2rec.cc $(ALLX_DEP) |
| |
| $(BIN) : |
| @mkdir -p $(@D) |
| $(CXX) $(CFLAGS) -std=c++11 -o $@ $(filter %.cpp %.o %.c %.a %.cc, $^) $(LDFLAGS) |
| |
| # CPP Package |
| ifeq ($(USE_CPP_PACKAGE), 1) |
| include cpp-package/cpp-package.mk |
| CFLAGS += -DMXNET_USE_CPP_PACKAGE=1 |
| endif |
| |
| include mkldnn.mk |
| include tests/cpp/unittest.mk |
| |
| extra-packages: $(EXTRA_PACKAGES) |
| |
| test: $(TEST) |
| |
| lint: cpplint rcpplint jnilint pylint |
| |
| cpplint: |
| 3rdparty/dmlc-core/scripts/lint.py mxnet cpp include src plugin cpp-package tests \ |
| --exclude_path src/operator/contrib/ctc_include include/mkldnn |
| |
| pylint: |
| python3 -m pylint --rcfile=$(ROOTDIR)/ci/other/pylintrc --ignore-patterns=".*\.so$$,.*\.dll$$,.*\.dylib$$" python/mxnet |
| |
| # MXNet extension dynamically loading libraries |
| EXT_LIBS = build/libcustomop_lib.so build/libtransposecsr_lib.so build/libtransposerowsp_lib.so build/libsubgraph_lib.so build/libpass_lib.so |
| ifeq ($(USE_CUDA), 1) |
| EXT_LIBS += build/libcustomop_gpu_lib.so |
| endif |
| extension_libs: $(EXT_LIBS) |
| |
| build/libcustomop_lib.so: |
| @mkdir -p $(@D) |
| $(CXX) -shared -fPIC -std=c++11 example/extensions/lib_custom_op/gemm_lib.cc src/lib_api.cc -o $@ -I include |
| build/libcustomop_gpu_lib.so: |
| @mkdir -p $(@D) |
| $(NVCC) -shared -std=c++11 -Xcompiler -fPIC example/extensions/lib_custom_op/relu_lib.cu src/lib_api.cc -o $@ -I include |
| build/libsubgraph_lib.so: |
| @mkdir -p $(@D) |
| $(CXX) -shared -fPIC -std=c++11 example/extensions/lib_subgraph/subgraph_lib.cc src/lib_api.cc -o $@ -I include |
| build/libtransposecsr_lib.so: |
| @mkdir -p $(@D) |
| $(CXX) -shared -fPIC -std=c++11 example/extensions/lib_custom_op/transposecsr_lib.cc src/lib_api.cc -o $@ -I include |
| build/libtransposerowsp_lib.so: |
| @mkdir -p $(@D) |
| $(CXX) -shared -fPIC -std=c++11 example/extensions/lib_custom_op/transposerowsp_lib.cc src/lib_api.cc -o $@ -I include |
| build/libcustomop_gpu_lib.so: |
| @mkdir -p $(@D) |
| $(NVCC) -shared -std=c++11 -Xcompiler -fPIC example/extensions/lib_custom_op/relu_lib.cu src/lib_api.cc -o $@ -I include |
| build/libsubgraph_lib.so: |
| @mkdir -p $(@D) |
| $(CXX) -shared -fPIC -std=c++11 example/extensions/lib_subgraph/subgraph_lib.cc src/lib_api.cc -o $@ -I include |
| build/libpass_lib.so: |
| @mkdir -p $(@D) |
| $(CXX) -shared -fPIC -std=c++11 example/extensions/lib_pass/pass_lib.cc src/lib_api.cc -o $@ -I include |
| |
| # Cython build |
| cython: |
| cd python; $(PYTHON) setup.py build_ext --inplace --with-cython |
| |
| cython3: |
| cd python; python3 setup.py build_ext --inplace --with-cython |
| |
| cyclean: |
| rm -rf python/mxnet/*/*.so python/mxnet/*/*.cpp |
| |
| scalaclean: |
| (cd $(ROOTDIR)/scala-package && mvn clean) |
| |
| scalapkg: |
| (cd $(ROOTDIR)/scala-package && mvn install -DskipTests) |
| |
| scalainstall: |
| (cd $(ROOTDIR)/scala-package && mvn install) |
| |
| scalaunittest: |
| (cd $(ROOTDIR)/scala-package && mvn install) |
| |
| scalaintegrationtest: |
| (cd $(ROOTDIR)/scala-package && mvn integration-test -DskipTests=false) |
| |
| jnilint: |
| 3rdparty/dmlc-core/scripts/lint.py mxnet-jnicpp cpp scala-package/native/src --exclude_path scala-package/native/src/main/native/org_apache_mxnet_native_c_api.h |
| |
| rclean: |
| $(RM) -r R-package/src/image_recordio.h R-package/NAMESPACE R-package/man R-package/R/mxnet_generated.R \ |
| R-package/inst R-package/src/*.o R-package/src/*.so mxnet_*.tar.gz |
| |
| build/rat/apache-rat-0.13/apache-rat-0.13.jar: |
| mkdir -p build/rat |
| cd build/rat; \ |
| wget http://mirror.metrocast.net/apache//creadur/apache-rat-0.13/apache-rat-0.13-bin.zip; \ |
| unzip apache-rat-0.13-bin.zip; |
| |
| ratcheck: build/rat/apache-rat-0.13/apache-rat-0.13.jar |
| exec 5>&1; \ |
| RAT_JAR=build/rat/apache-rat-0.13/apache-rat-0.13.jar; \ |
| OUTPUT=$(java -jar $(RAT_JAR) -E tests/nightly/apache_rat_license_check/rat-excludes -d .|tee >(cat - >&5)); \ |
| ERROR_MESSAGE="Printing headers for text files without a valid license header"; \ |
| echo "-------Process The Output-------"; \ |
| if [[ $OUTPUT =~ $ERROR_MESSAGE ]]; then \ |
| echo "ERROR: RAT Check detected files with unknown licenses. Please fix and run test again!"; \ |
| exit 1; \ |
| else \ |
| echo "SUCCESS: There are no files with an Unknown License."; \ |
| fi |
| |
| |
| ifneq ($(EXTRA_OPERATORS),) |
| clean: rclean cyclean $(EXTRA_PACKAGES_CLEAN) |
| $(RM) -r build lib bin deps *~ */*~ */*/*~ */*/*/*~ |
| (cd scala-package && mvn clean) || true |
| cd $(DMLC_CORE); $(MAKE) clean; cd - |
| cd $(PS_PATH); $(MAKE) clean; cd - |
| cd $(NNVM_PATH); $(MAKE) clean; cd - |
| cd $(TVM_PATH); $(MAKE) clean; cd - |
| cd $(AMALGAMATION_PATH); $(MAKE) clean; cd - |
| $(RM) -r $(patsubst %, %/*.d, $(EXTRA_OPERATORS)) $(patsubst %, %/*/*.d, $(EXTRA_OPERATORS)) |
| $(RM) -r $(patsubst %, %/*.o, $(EXTRA_OPERATORS)) $(patsubst %, %/*/*.o, $(EXTRA_OPERATORS)) |
| else |
| clean: rclean mkldnn_clean cyclean testclean $(EXTRA_PACKAGES_CLEAN) |
| $(RM) -r build lib bin *~ */*~ */*/*~ */*/*/*~ |
| (cd scala-package && mvn clean) || true |
| cd $(DMLC_CORE); $(MAKE) clean; cd - |
| cd $(PS_PATH); $(MAKE) clean; cd - |
| cd $(NNVM_PATH); $(MAKE) clean; cd - |
| cd $(TVM_PATH); $(MAKE) clean; cd - |
| cd $(AMALGAMATION_PATH); $(MAKE) clean; cd - |
| endif |
| |
| clean_all: clean |
| |
| -include build/*.d |
| -include build/*/*.d |
| -include build/*/*/*.d |
| -include build/*/*/*/*.d |
| ifneq ($(EXTRA_OPERATORS),) |
| -include $(patsubst %, %/*.d, $(EXTRA_OPERATORS)) $(patsubst %, %/*/*.d, $(EXTRA_OPERATORS)) |
| endif |