blob: ba454b63500416eeb50fb5a5f81525e65aad2e60 [file] [log] [blame]
#!/usr/bin/env python
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import logging
import os
import re
import subprocess
# Matches the output lines from the 'ldd' tool. For example:
# => /path/to/usr/lib64/ (0x00007fb0cb0a5000)
# Note: The following pattern will not match the following two types of
# dependencies and so they will not be included in the output from this module:
# 1. The dynamic linker:
# /lib64/ (0x00007f6f7ab79000)
# 2. Linux virtual dynamic shared objects:
# (0x00007ffc06cfb000)
LDD_RE = re.compile(r'^\s+.+? => (\S+) \(0x.+\)')
class DependencyExtractor(object):
This class extracts native library dependencies from the given executable.
def __init__(self):
self.deps_cache = {}
self.lib_allowed_filter = lambda path: True
self.enable_expand_symlinks = False
def set_library_filter(self, lib_allowed_filter):
Specify a filter predicate that should return True iff the specified
library path should be included in the result from extract_deps().
By default, all libraries are included in the result.
self.lib_allowed_filter = lib_allowed_filter
def set_expand_symlinks(self, expand):
Specify whether symlinks should be expanded in the output from
extract_deps(). By default, symlinks are not expanded. See
self.enable_expand_symlinks = expand
def expand_symlinks(self, deps):
ldd will often point to symlinks. Return a list including any symlink in
the specified dependency list as well as whatever it's pointing to,
expanded = []
for path in deps:
while os.path.islink(path):
# TODO(mpercy): os.readlink() can return an absolute path. Should we more carefully handle
# the path concatenation here?
path = os.path.join(os.path.dirname(path), os.readlink(path))
return expanded
def extract_deps(self, exe):
Runs 'ldd' on the provided 'exe' path, returning a list of
any libraries it depends on. Blacklisted libraries are
removed from this list.
If the provided 'exe' is not a binary executable, returns
an empty list.
if (exe.endswith(".jar") or
exe.endswith(".pl") or
exe.endswith(".py") or
exe.endswith(".sh") or
exe.endswith(".txt") or
return []
if exe not in self.deps_cache:
p = subprocess.Popen(["ldd", exe], stdout=subprocess.PIPE)
out, err = p.communicate()
self.deps_cache[exe] = (out, err, p.returncode)
out, err, rc = self.deps_cache[exe]
if rc != 0:
logging.warning("failed to run ldd on %s", exe)
return []
deps = []
for line in out.splitlines():
match = LDD_RE.match(line)
if not match:
dep =
# Apply the provided predicate.
if not self.lib_allowed_filter(dep):
if self.enable_expand_symlinks:
deps = self.expand_symlinks(deps)
return deps