| #!/usr/bin/env python3 |
| # -*- coding: utf-8 -*- |
| |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| """Multi arch dockerized build tool.""" |
| |
| __author__ = 'Marco de Abreu, Kellen Sunderland, Anton Chernov, Pedro Larroy, Leonard Lausen' |
| __version__ = '0.4' |
| |
| import argparse |
| import pprint |
| import os |
| import signal |
| import subprocess |
| from itertools import chain |
| from subprocess import check_call |
| from typing import * |
| |
| import yaml |
| |
| from util import * |
| |
| |
| def get_platforms() -> List[str]: |
| """Get a list of architectures declared in docker-compose.yml""" |
| with open("docker/docker-compose.yml", "r") as f: |
| compose_config = yaml.load(f.read(), yaml.SafeLoader) |
| return list(compose_config["services"].keys()) |
| |
| def get_docker_tag(platform: str, registry: str) -> str: |
| """:return: docker tag to be used for the container""" |
| with open("docker/docker-compose.yml", "r") as f: |
| compose_config = yaml.load(f.read(), yaml.SafeLoader) |
| return compose_config["services"][platform]["image"].replace('${DOCKER_CACHE_REGISTRY}', registry) |
| |
| def build_docker(platform: str, registry: str, num_retries: int, no_cache: bool, |
| cache_intermediate: bool = False) -> str: |
| """ |
| Build a container for the given platform |
| :param platform: Platform |
| :param registry: Dockerhub registry name |
| :param num_retries: Number of retries to build the docker image |
| :param no_cache: pass no-cache to docker to rebuild the images |
| :return: Id of the top level image |
| """ |
| logging.info('Building docker container \'%s\' based on ci/docker/docker-compose.yml', platform) |
| # We add a user with the same group as the executing non-root user so files created in the |
| # container match permissions of the local user. Same for the group. |
| cmd = ['docker-compose', '-f', 'docker/docker-compose.yml', 'build', |
| "--build-arg", "USER_ID={}".format(os.getuid()), |
| "--build-arg", "GROUP_ID={}".format(os.getgid())] |
| if cache_intermediate: |
| cmd.append('--no-rm') |
| cmd.append(platform) |
| |
| env = os.environ.copy() |
| env["DOCKER_CACHE_REGISTRY"] = registry |
| |
| @retry(subprocess.CalledProcessError, tries=num_retries) |
| def run_cmd(env=None): |
| logging.info("Running command: '%s'", ' '.join(cmd)) |
| check_call(cmd, env=env) |
| |
| run_cmd(env=env) |
| |
| |
| def buildir() -> str: |
| return os.path.join(get_mxnet_root(), "build") |
| |
| |
| def default_ccache_dir() -> str: |
| """:return: ccache directory for the current platform""" |
| # Share ccache across containers |
| if 'CCACHE_DIR' in os.environ: |
| ccache_dir = os.path.realpath(os.environ['CCACHE_DIR']) |
| try: |
| os.makedirs(ccache_dir, exist_ok=True) |
| return ccache_dir |
| except PermissionError: |
| logging.info('Unable to make dirs at %s, falling back to local temp dir', ccache_dir) |
| # In osx tmpdir is not mountable by default |
| import platform |
| if platform.system() == 'Darwin': |
| ccache_dir = "/tmp/_mxnet_ccache" |
| os.makedirs(ccache_dir, exist_ok=True) |
| return ccache_dir |
| return os.path.join(os.path.expanduser("~"), ".ccache") |
| |
| |
| def container_run(platform: str, |
| nvidia_runtime: bool, |
| docker_registry: str, |
| shared_memory_size: str, |
| local_ccache_dir: str, |
| command: List[str], |
| environment: Dict[str, str], |
| dry_run: bool = False) -> int: |
| """Run command in a container""" |
| # set default environment variables |
| environment.update({ |
| 'CCACHE_MAXSIZE': '500G', |
| 'CCACHE_TEMPDIR': '/tmp/ccache', # temp dir should be local and not shared |
| 'CCACHE_DIR': '/work/ccache', # this path is inside the container as /work/ccache is mounted |
| 'CCACHE_LOGFILE': '/tmp/ccache.log', # a container-scoped log, useful for ccache verification. |
| }) |
| environment.update({k: os.environ[k] for k in ['CCACHE_MAXSIZE'] if k in os.environ}) |
| if 'RELEASE_BUILD' not in environment: |
| environment['RELEASE_BUILD'] = 'false' |
| |
| tag = get_docker_tag(platform=platform, registry=docker_registry) |
| mx_root = get_mxnet_root() |
| local_build_folder = buildir() |
| # We need to create it first, otherwise it will be created by the docker daemon with root only permissions |
| os.makedirs(local_build_folder, exist_ok=True) |
| os.makedirs(local_ccache_dir, exist_ok=True) |
| logging.info("Using ccache directory: %s", local_ccache_dir) |
| |
| # Log enviroment |
| logging.info("environment ---> {0}".format(environment)) |
| |
| # Build docker command |
| docker_arg_list = [ |
| "--cap-add", "SYS_PTRACE", # Required by ASAN |
| '--rm', |
| '--shm-size={}'.format(shared_memory_size), |
| # mount mxnet root |
| '-v', "{}:/work/mxnet".format(mx_root), |
| # mount mxnet/build for storing build |
| '-v', "{}:/work/build".format(local_build_folder), |
| '-v', "{}:/work/ccache".format(local_ccache_dir), |
| '-u', '{}:{}'.format(os.getuid(), os.getgid()), |
| '-e', 'CCACHE_MAXSIZE={}'.format(environment['CCACHE_MAXSIZE']), |
| # temp dir should be local and not shared |
| '-e', 'CCACHE_TEMPDIR={}'.format(environment['CCACHE_TEMPDIR']), |
| # this path is inside the container as /work/ccache is mounted |
| '-e', 'CCACHE_DIR={}'.format(environment['CCACHE_DIR']), |
| # a container-scoped log, useful for ccache verification. |
| '-e', 'CCACHE_LOGFILE={}'.format(environment['CCACHE_LOGFILE']), |
| # whether this is a release build or not |
| '-e', 'RELEASE_BUILD={}'.format(environment['RELEASE_BUILD']), |
| ] |
| docker_arg_list += [tag] |
| docker_arg_list.extend(command) |
| |
| def docker_run_cmd(cmd): |
| logging.info("Running %s in container %s", command, tag) |
| logging.info("Executing command:\n%s\n", ' \\\n\t'.join(cmd)) |
| subprocess.run(cmd, stdout=sys.stdout, stderr=sys.stderr, check=True) |
| |
| if not dry_run: |
| if not nvidia_runtime: |
| docker_run_cmd(['docker', 'run'] + docker_arg_list) |
| else: |
| try: |
| docker_run_cmd(['docker', 'run', '--gpus', 'all'] + docker_arg_list) |
| except subprocess.CalledProcessError as e: |
| if e.returncode == 125: |
| docker_run_cmd(['docker', 'run', '--runtime', 'nvidia'] + docker_arg_list) |
| else: |
| raise |
| |
| return 0 |
| |
| |
| def list_platforms() -> str: |
| return "\nSupported platforms:\n{}".format('\n'.join(get_platforms())) |
| |
| |
| def load_docker_cache(platform, tag, docker_registry) -> None: |
| """Imports tagged container from the given docker registry""" |
| if docker_registry: |
| env = os.environ.copy() |
| env["DOCKER_CACHE_REGISTRY"] = docker_registry |
| cmd = ['docker-compose', '-f', 'docker/docker-compose.yml', 'pull', platform] |
| logging.info("Running command: 'DOCKER_CACHE_REGISTRY=%s %s'", docker_registry, ' '.join(cmd)) |
| check_call(cmd, env=env) |
| else: |
| logging.info('Distributed docker cache disabled') |
| |
| |
| def log_environment(): |
| instance_info = ec2_instance_info() |
| if instance_info: |
| logging.info("EC2: %s", instance_info) |
| pp = pprint.PrettyPrinter(indent=4) |
| logging.debug("Build environment: %s", pp.pformat(dict(os.environ))) |
| |
| |
| def main() -> int: |
| config_logging() |
| |
| logging.info("MXNet container based build tool.") |
| log_environment() |
| chdir_to_script_directory() |
| |
| parser = argparse.ArgumentParser(description="""Utility for building and testing MXNet on docker |
| containers""", epilog="") |
| parser.add_argument("-p", "--platform", type=str, help= \ |
| "Platform. See ci/docker/docker-compose.yml for list of supported " \ |
| "platforms (services).") |
| |
| parser.add_argument("-b", "--build-only", |
| help="Only build the container, don't build the project", |
| action='store_true') |
| |
| parser.add_argument("-R", "--run-only", |
| help="Only run the container, don't rebuild the container", |
| action='store_true') |
| |
| parser.add_argument("-n", "--nvidiadocker", |
| help="Use nvidia docker", |
| action='store_true') |
| |
| parser.add_argument("--shm-size", |
| help="Size of the shared memory /dev/shm allocated in the container (e.g '1g')", |
| default='500m', |
| dest="shared_memory_size") |
| |
| parser.add_argument("-l", "--list", |
| help="List platforms", |
| action='store_true') |
| |
| parser.add_argument("--print-docker-run", |
| help="print docker run command for manual inspection", |
| action='store_true') |
| |
| parser.add_argument("-d", "--docker-registry", |
| help="Dockerhub registry name to retrieve cache from.", |
| default='mxnetci', |
| type=str) |
| |
| parser.add_argument("-r", "--docker-build-retries", |
| help="Number of times to retry building the docker image. Default is 1", |
| default=1, |
| type=int) |
| |
| parser.add_argument("--no-pull", action="store_true", |
| help="Don't pull from dockerhub registry to initialize cache.") |
| |
| parser.add_argument("--no-cache", action="store_true", |
| help="passes --no-cache to docker build") |
| |
| parser.add_argument("--cache-intermediate", action="store_true", |
| help="passes --rm=false to docker build") |
| |
| parser.add_argument("-e", "--environment", nargs="*", default=[], |
| help="Environment variables for the docker container. " |
| "Specify with a list containing either names or name=value") |
| |
| parser.add_argument("command", |
| help="command to run in the container", |
| nargs='*', action='append', type=str) |
| |
| parser.add_argument("--ccache-dir", |
| default=default_ccache_dir(), |
| help="ccache directory", |
| type=str) |
| |
| args = parser.parse_args() |
| |
| command = list(chain.from_iterable(args.command)) |
| environment = dict([(e.split('=')[:2] if '=' in e else (e, os.environ[e])) |
| for e in args.environment]) |
| |
| if args.list: |
| print(list_platforms()) |
| elif args.platform: |
| platform = args.platform |
| tag = get_docker_tag(platform=platform, registry=args.docker_registry) |
| if args.docker_registry and not args.no_pull: |
| load_docker_cache(platform=platform, tag=tag, docker_registry=args.docker_registry) |
| if not args.run_only: |
| build_docker(platform=platform, registry=args.docker_registry, num_retries=args.docker_build_retries, |
| no_cache=args.no_cache, cache_intermediate=args.cache_intermediate) |
| else: |
| logging.info("Skipping docker build step.") |
| |
| if args.build_only: |
| logging.warning("Container was just built. Exiting due to build-only.") |
| return 0 |
| |
| # noinspection PyUnusedLocal |
| ret = 0 |
| if command: |
| ret = container_run( |
| platform=platform, nvidia_runtime=args.nvidiadocker, |
| shared_memory_size=args.shared_memory_size, command=command, docker_registry=args.docker_registry, |
| local_ccache_dir=args.ccache_dir, environment=environment) |
| elif args.print_docker_run: |
| command = [] |
| ret = container_run( |
| platform=platform, nvidia_runtime=args.nvidiadocker, |
| shared_memory_size=args.shared_memory_size, command=command, docker_registry=args.docker_registry, |
| local_ccache_dir=args.ccache_dir, dry_run=True, environment=environment) |
| else: |
| # With no commands, execute a build function for the target platform |
| command = ["/work/mxnet/ci/docker/runtime_functions.sh", "build_{}".format(platform)] |
| logging.info("No command specified, trying default build: %s", ' '.join(command)) |
| ret = container_run( |
| platform=platform, nvidia_runtime=args.nvidiadocker, |
| shared_memory_size=args.shared_memory_size, command=command, docker_registry=args.docker_registry, |
| local_ccache_dir=args.ccache_dir, environment=environment) |
| |
| if ret != 0: |
| logging.critical("Execution of %s failed with status: %d", command, ret) |
| return ret |
| |
| else: |
| parser.print_help() |
| list_platforms() |
| print(""" |
| Examples: |
| |
| ./build.py -p armv7 |
| |
| Will build a docker container with cross compilation tools and build MXNet for armv7 by |
| running: ci/docker/runtime_functions.sh build_armv7 inside the container. |
| |
| ./build.py -p armv7 ls |
| |
| Will execute the given command inside the armv7 container |
| |
| ./build.py -p armv7 --print-docker-run |
| |
| Will print a docker run command to get inside the container in a shell |
| |
| ./build.py -a |
| |
| Builds for all platforms and leaves artifacts in build_<platform> |
| |
| """) |
| |
| return 0 |
| |
| |
| if __name__ == '__main__': |
| sys.exit(main()) |