#!/usr/bin/env python
################################################################################
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
################################################################################
# This script makes Kudu release binaries relocatable for easy use by
# integration tests using a mini cluster. The resulting binaries should never
# be deployed to run an actual Kudu service, whether in production or
# development, because all security dependencies are copied from the build
# system and will not be updated if the operating system on the runtime host is
# patched.
################################################################################

import errno
import logging
import optparse
import os
import os.path
import re
import shutil
import subprocess
import sys

SOURCE_ROOT = os.path.join(os.path.dirname(__file__), "../..")
# Add the build-support dir to the system path so we can import kudu-util.
sys.path.append(os.path.join(SOURCE_ROOT, "build-support"))

from kudu_util import check_output, Colors, init_logging
from dep_extract import DependencyExtractor

# Constants.
LC_RPATH = 'LC_RPATH'
LC_LOAD_DYLIB = 'LC_LOAD_DYLIB'
KEY_CMD = 'cmd'
KEY_NAME = 'name'
KEY_PATH = 'path'

PAT_SASL_LIBPLAIN = re.compile(r'libplain')

# Exclude libraries that are (L)GPL-licensed and libraries that are not
# portable across Linux kernel versions. One exception is 'libpcre', which
# is BSD-licensed. It is excluded because it is a transitive dependency
# introduced by 'libselinux'.
PAT_LINUX_LIB_EXCLUDE = re.compile(r"""(libpthread|
                                        libc|
                                        libstdc\+\+|
                                        librt|
                                        libdl|
                                        libresolv|
                                        libgcc.*|
                                        libcrypt|
                                        libm|
                                        libkeyutils|
                                        libcom_err|
                                        libdb-[\d.]+|
                                        libselinux|
                                        libpcre|
                                        libtinfo
                                       )\.so""", re.VERBOSE)

# We don't want to ship libSystem because it includes kernel and thread
# routines that we assume may not be portable between macOS versions.
# Also do not ship core libraries that come with the default macOS install
# unless we know that we need to for ABI reasons.
PAT_MACOS_LIB_EXCLUDE = re.compile(r"""(AppleFSCompression$|
                                        CFNetwork$|
                                        CoreFoundation$|
                                        CoreServices$|
                                        DiskArbitration$|
                                        IOKit$|
                                        Foundation$|
                                        Kerberos$|
                                        Security$|
                                        SystemConfiguration$|
                                        libCRFSuite|
                                        libDiagnosticMessagesClient|
                                        libSystem|
                                        libapple_nghttp2|
                                        libarchive|
                                        libc\+\+\.|
                                        libenergytrace|
                                        libicucore|
                                        libncurses|
                                        libnetwork|
                                        libobjc|
                                        libresolv|
                                        libsasl2|
                                        libxar|
                                        libz
                                       )""",
                                       re.VERBOSE)

# Config keys.
BUILD_ROOT = 'build_root'
BUILD_BIN_DIR = 'build_bin_dir'
ARTIFACT_ROOT = 'artifact_root'
ARTIFACT_BIN_DIR = 'artifact_bin_dir'
ARTIFACT_LIB_DIR = 'artifact_lib_dir'

IS_MACOS = os.uname()[0] == "Darwin"
IS_LINUX = os.uname()[0] == "Linux"

def check_for_command(command):
  """
  Ensure that the specified command is available on the PATH.
  """
  try:
    _ = check_output(['which', command])
  except subprocess.CalledProcessError as err:
    logging.error("Unable to find %s command", command)
    raise err

def dump_load_commands_macos(binary_path):
  """
  Run `otool -l` on the given binary.
  Returns a list with one line of otool output per entry.
  We use 'otool -l' instead of 'objdump -p' because 'otool' supports Universal
  Mach-O binaries.
  """

  check_for_command('otool')
  try:
    output = check_output(["otool", "-l", binary_path])
  except subprocess.CalledProcessError as err:
    logging.error("Failed to run %s", err.cmd)
    raise err
  return output.strip().decode("utf-8").split("\n")

def parse_load_commands_macos(cmd_type, dump):
  """
  Parses the output from dump_load_commands_macos() for macOS.
  'cmd_type' must be one of the following:
  * LC_RPATH: Returns a list containing the rpath search path, with one
    search path per entry.
  * LC_LOAD_DYLIB: Returns a list of shared object library dependencies, with
    one shared object per entry. They are returned as stored in the MachO
    header, without being first resolved to an absolute path, and may look
    like: @rpath/Foo.framework/Versions/A/Foo
  'dump' is the output from dump_load_commands_macos().
  """
  # Parsing state enum values.
  PARSING_NONE = 0
  PARSING_NEW_RECORD = 1
  PARSING_RPATH = 2
  PARSING_LIB_PATHS = 3

  state = PARSING_NONE
  values = []
  for line in dump:
    # Ensure the line is a string-like object.
    try:
      line = line.decode('utf-8')
    except (UnicodeDecodeError, AttributeError):
      pass
    if re.match('^Load command', line):
      state = PARSING_NEW_RECORD
      continue
    splits = re.split('\s+', line.strip(), maxsplit=2)
    key = splits[0]
    val = splits[1] if len(splits) > 1 else None
    if state == PARSING_NEW_RECORD:
      if key == KEY_CMD and val == LC_RPATH:
        state = PARSING_RPATH
        continue
      if key == KEY_CMD and val == LC_LOAD_DYLIB:
        state = PARSING_LIB_PATHS
        continue

    if state == PARSING_RPATH and cmd_type == LC_RPATH:
      if key == KEY_PATH:
        # Strip trailing metadata from rpath dump line.
        values.append(val)

    if state == PARSING_LIB_PATHS and cmd_type == LC_LOAD_DYLIB:
      if key == KEY_NAME:
        values.append(val)
  return values

def get_rpaths_macos(binary_path):
  """
  Helper function that returns a list of rpaths parsed from the given binary.
  """
  dump = dump_load_commands_macos(binary_path)
  return parse_load_commands_macos(LC_RPATH, dump)

def resolve_library_paths_macos(raw_library_paths, rpaths):
  """
  Resolve the library paths from parse_load_commands_macos(LC_LOAD_DYLIB, ...) to
  absolute filesystem paths using the rpath information returned from
  get_rpaths_macos().
  Returns a mapping from original to resolved library paths on success.
  If any libraries cannot be resolved, prints an error to stderr and returns
  an empty map.
  """
  resolved_paths = {}
  for raw_lib_path in raw_library_paths:
    if not raw_lib_path.startswith("@rpath"):
      resolved_paths[raw_lib_path] = raw_lib_path
      continue
    resolved = False
    for rpath in rpaths:
      resolved_path = re.sub('@rpath', rpath, raw_lib_path)
      if os.path.exists(resolved_path):
        resolved_paths[raw_lib_path] = resolved_path
        resolved = True
        break
    if not resolved:
      raise IOError(errno.ENOENT, "Unable to locate library %s in rpath %s" % (raw_lib_path, rpaths))
  return resolved_paths

def get_resolved_dep_library_paths_macos(binary_path):
  """
  Returns a map of symbolic to resolved library dependencies of the given binary.
  See resolve_library_paths_macos().
  """
  load_commands = dump_load_commands_macos(binary_path)
  lib_search_paths = parse_load_commands_macos(LC_LOAD_DYLIB, load_commands)
  rpaths = parse_load_commands_macos(LC_RPATH, load_commands)
  return resolve_library_paths_macos(lib_search_paths, rpaths)

def get_artifact_name():
  """
  Create an archive with an appropriate name. Including version, OS, and architecture.
  """
  if IS_LINUX:
    os_str = "linux"
  elif IS_MACOS:
    os_str = "osx"
  else:
    raise NotImplementedError("Unsupported platform")
  arch = os.uname()[4]
  with open(os.path.join(SOURCE_ROOT, "version.txt"), 'r') as version:
    line = version.readline()
    # Ensure the line is a string-like object.
    try:
      line = line.decode('utf-8')
    except (UnicodeDecodeError, AttributeError):
      pass
    version = line.strip()
  artifact_name = "kudu-binary-%s-%s-%s" % (version, os_str, arch)
  return artifact_name

def mkconfig(build_root, artifact_root):
  """
  Build a configuration map for convenient plumbing of path information.
  """
  config = {}
  config[BUILD_ROOT] = build_root
  config[BUILD_BIN_DIR] = os.path.join(build_root, "bin")
  config[ARTIFACT_ROOT] = artifact_root
  config[ARTIFACT_BIN_DIR] = os.path.join(artifact_root, "bin")
  config[ARTIFACT_LIB_DIR] = os.path.join(artifact_root, "lib")
  return config

def prep_artifact_dirs(config):
  """
  Create any required artifact output directories, if needed.
  """

  if not os.path.exists(config[ARTIFACT_ROOT]):
    os.makedirs(config[ARTIFACT_ROOT], mode=0o755)
  if not os.path.exists(config[ARTIFACT_BIN_DIR]):
    os.makedirs(config[ARTIFACT_BIN_DIR], mode=0o755)
  if not os.path.exists(config[ARTIFACT_LIB_DIR]):
    os.makedirs(config[ARTIFACT_LIB_DIR], mode=0o755)

def copy_file(src, dest):
  """
  Copy the file with path 'src' to path 'dest'.
  If 'src' is a symlink, the link will be followed and 'dest' will be written
  as a plain file.
  """
  shutil.copyfile(src, dest)

def copy_file_preserve_links(src, dest):
  """
  Same as copy_file but preserves symlinks.
  """
  if not os.path.islink(src):
    copy_file(src, dest)
    return
  link_target = os.readlink(src)
  os.symlink(link_target, dest)

def chrpath(target, new_rpath):
  """
  Change the RPATH or RUNPATH for the specified target. See man chrpath(1).
  """

  # Continue with a warning if no rpath is set on the binary.
  try:
    subprocess.check_call(['chrpath', '-l', target])
  except subprocess.CalledProcessError as err:
    logging.warning("No RPATH or RUNPATH set on target %s, continuing...", target)
    return

  # Update the rpath.
  try:
    subprocess.check_call(['chrpath', '-r', new_rpath, target])
  except subprocess.CalledProcessError as err:
    logging.warning("Failed to chrpath for target %s", target)
    raise err

def get_resolved_deps(target):
  """
  Return a list of resolved library dependencies for the given target.
  """
  if IS_LINUX:
    return DependencyExtractor().extract_deps(target)
  if IS_MACOS:
    return get_resolved_dep_library_paths_macos(target).values()
  raise NotImplementedError("not implemented")

def relocate_deps_linux(target_src, target_dst, config):
  """
  See relocate_deps(). Linux implementation.
  """
  NEW_RPATH = '$ORIGIN/../lib'

  # Make sure we have the chrpath command available in the Linux build.
  check_for_command('chrpath')

  # Copy the linked libraries.
  dep_extractor = DependencyExtractor()
  dep_extractor.set_library_filter(lambda path: False if PAT_LINUX_LIB_EXCLUDE.search(path) else True)
  libs = dep_extractor.extract_deps(target_src)
  for lib_src in libs:
    lib_dst = os.path.join(config[ARTIFACT_LIB_DIR], os.path.basename(lib_src))
    copy_file(lib_src, lib_dst)
    # We have to set the RUNPATH of the shared objects as well for transitive
    # dependencies to be properly resolved. $ORIGIN is always relative to the
    # running executable.
    chrpath(lib_dst, NEW_RPATH)

  # We must also update the RUNPATH of the executable itself to look for its
  # dependencies in a relative location.
  chrpath(target_dst, NEW_RPATH)

def fix_rpath_macos(target_dst):
  check_for_command('install_name_tool')
  rpaths = get_rpaths_macos(target_dst)
  for rpath in rpaths:
    subprocess.check_call(['install_name_tool', '-delete_rpath', rpath, target_dst])
  subprocess.check_call(['install_name_tool', '-add_rpath', '@executable_path/../lib',
                         target_dst])

def relocate_dep_path_macos(target_dst, dep_search_name):
  """
  Change library search path to @rpath for the specified search named in the
  specified binary.
  """
  modified_search_name = re.sub('^.*/', '@rpath/', dep_search_name)
  subprocess.check_call(['install_name_tool', '-change',
                        dep_search_name, modified_search_name, target_dst])

def relocate_deps_macos(target_src, target_dst, config):
  """
  See relocate_deps(). macOS implementation.
  """
  target_deps = get_resolved_dep_library_paths_macos(target_src)

  check_for_command('install_name_tool')

  # Modify the rpath of the target.
  fix_rpath_macos(target_dst)

  # For each dependency, relocate the path we will search for it and ensure it
  # is shipped with the archive.
  for (dep_search_name, dep_src) in target_deps.items():
    # Filter out libs we don't want to archive.
    if PAT_MACOS_LIB_EXCLUDE.search(dep_search_name):
      continue

    # Change the search path of the specified dep in 'target_dst'.
    relocate_dep_path_macos(target_dst, dep_search_name)

    # Archive the rest of the runtime dependencies.
    dep_dst = os.path.join(config[ARTIFACT_LIB_DIR], os.path.basename(dep_src))
    if not os.path.isfile(dep_dst):
      # Recursively copy and relocate library dependencies as they are found.
      copy_file(dep_src, dep_dst)
      relocate_deps_macos(dep_src, dep_dst, config)

def relocate_deps(target_src, target_dst, config):
  """
  Make the target relocatable and copy all of its dependencies into the
  artifact directory.
  """
  if IS_LINUX:
    return relocate_deps_linux(target_src, target_dst, config)
  if IS_MACOS:
    return relocate_deps_macos(target_src, target_dst, config)
  raise NotImplementedError("Unsupported platform")

def relocate_sasl2(target_src, config):
  """
  Relocate the sasl2 dynamically loaded modules.
  Returns False if the modules could not be found.
  Returns True if the modules were found and relocated.
  Raises an error if there is a problem during relocation of the sasl2 modules.
  """

  # Find the libsasl2 module in our dependencies.
  deps = get_resolved_deps(target_src)
  sasl_lib = None
  for dep in deps:
    if re.search('libsasl2', dep):
      sasl_lib = dep
      break

  # Look for libplain in potential sasl2 module paths, which is required for
  # Kudu's basic operation.
  sasl_path = None
  if sasl_lib:
    path = os.path.join(os.path.dirname(sasl_lib), "sasl2")
    if os.path.exists(path):
      children = os.listdir(path)
      for child in children:
        if PAT_SASL_LIBPLAIN.search(child):
          sasl_path = path
          break

  if not sasl_path:
    return False

  dest_dir = os.path.join(config[ARTIFACT_LIB_DIR], 'sasl2')
  os.mkdir(dest_dir)

  to_relocate = []
  for dirpath, subdirs, files in os.walk(sasl_path):
    for f in files:
      file_src = os.path.join(dirpath, f)
      file_dst = os.path.join(dest_dir, f)
      copy_file_preserve_links(file_src, file_dst)
      if os.path.islink(file_src): continue
      relocate_deps(file_src, file_dst, config)

  return True

def main():
  if len(sys.argv) < 3:
    print("Usage: %s kudu_build_dir target [target ...]" % (sys.argv[0], ))
    sys.exit(1)

  # Command-line arguments.
  build_root = sys.argv[1]
  targets = sys.argv[2:]

  init_logging()

  if not os.path.exists(build_root):
    logging.error("Build directory %s does not exist", build_root)
    sys.exit(1)

  artifact_name = get_artifact_name()
  artifact_root = os.path.join(build_root, artifact_name)
  config = mkconfig(build_root, artifact_root)

  # Clear the artifact root to ensure a clean build.
  if os.path.exists(artifact_root):
    shutil.rmtree(artifact_root)

  # Create artifact directories, if needed.
  prep_artifact_dirs(config)

  relocated_sasl = False
  for target in targets:
    logging.info("Including target '%s' and its dependencies in archive...", target)
    # Copy the target into the artifact directory.
    target_src = os.path.join(config[BUILD_BIN_DIR], target)
    target_dst = os.path.join(config[ARTIFACT_BIN_DIR], target)
    copy_file(target_src, target_dst)

    if IS_LINUX and not relocated_sasl:
      # We only relocate sasl2 on Linux because macOS appears to ship sasl2 with
      # the default distribution and we've observed ABI compatibility issues
      # involving calls from libsasl2 into libSystem when shipping libsasl2 with
      # the binary artifact.
      logging.info("Attempting to relocate sasl2 modules...")
      relocated_sasl = relocate_sasl2(target_src, config)

    # Make the target relocatable and copy all of its dependencies into the
    # artifact directory.
    relocate_deps(target_src, target_dst, config)

if __name__ == "__main__":
  main()
