blob: a830df8d310728eb4b21793c0bbcd78c4f889a27 [file] [log] [blame]
#
# Copyright (C) 2017 Codethink Limited
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library. If not, see <http://www.gnu.org/licenses/>.
#
# Authors:
# Jonathan Maw <jonathan.maw@codethink.co.uk>
"""
tar - stage files from tar archives
===================================
**Host dependencies:**
* lzip (for .tar.lz files)
**Usage:**
.. code:: yaml
# Specify the tar source kind
kind: tar
# Specify the tar url. Using an alias defined in your project
# configuration is encouraged. 'bst track' will update the
# sha256sum in 'ref' to the downloaded file's sha256sum.
url: upstream:foo.tar
# Specify the ref. It's a sha256sum of the file you download.
ref: 6c9f6f68a131ec6381da82f2bff978083ed7f4f7991d931bfa767b7965ebc94b
# Specify a glob pattern to indicate the base directory to extract
# from the tarball. The first matching directory will be used.
#
# Note that this is '*' by default since most standard release
# tarballs contain a self named subdirectory at the root which
# contains the files one normally wants to extract to build.
#
# To extract the root of the tarball directly, this can be set
# to an empty string.
base-dir: '*'
See :ref:`built-in functionality doumentation <core_source_builtins>` for
details on common configuration options for sources.
"""
import os
import tarfile
from contextlib import contextmanager
from tempfile import TemporaryFile
from buildstream import SourceError
from buildstream import utils
from ._downloadablefilesource import DownloadableFileSource
class TarSource(DownloadableFileSource):
# pylint: disable=attribute-defined-outside-init
def configure(self, node):
super().configure(node)
self.base_dir = self.node_get_member(node, str, 'base-dir', '*') or None
self.node_validate(node, DownloadableFileSource.COMMON_CONFIG_KEYS + ['base-dir'])
self.keyorder += ['base-dir']
def preflight(self):
self.host_lzip = None
if self.url.endswith('.lz'):
self.host_lzip = utils.get_host_tool('lzip')
def get_unique_key(self):
return super().get_unique_key() + [self.base_dir]
@contextmanager
def _run_lzip(self):
assert self.host_lzip
with TemporaryFile() as lzip_stdout:
with open(self._get_mirror_file(), 'r') as lzip_file:
self.call([self.host_lzip, '-d'],
stdin=lzip_file,
stdout=lzip_stdout)
lzip_stdout.seek(0, 0)
yield lzip_stdout
@contextmanager
def _get_tar(self):
if self.url.endswith('.lz'):
with self._run_lzip() as lzip_dec:
with tarfile.open(fileobj=lzip_dec, mode='r:') as tar:
yield tar
else:
with tarfile.open(self._get_mirror_file()) as tar:
yield tar
def stage(self, directory):
try:
with self._get_tar() as tar:
base_dir = None
if self.base_dir:
base_dir = self._find_base_dir(tar, self.base_dir)
if base_dir:
tar.extractall(path=directory, members=self._extract_members(tar, base_dir))
else:
tar.extractall(path=directory)
except (tarfile.TarError, OSError) as e:
raise SourceError("{}: Error staging source: {}".format(self, e)) from e
# Override and translate which filenames to extract
def _extract_members(self, tar, base_dir):
if not base_dir.endswith(os.sep):
base_dir = base_dir + os.sep
L = len(base_dir)
for member in tar.getmembers():
# First, ensure that a member never starts with `./`
if member.path.startswith('./'):
member.path = member.path[2:]
# Now extract only the paths which match the normalized path
if member.path.startswith(base_dir):
# If it's got a link name, give it the same treatment, we
# need the link targets to match up with what we are staging
#
# NOTE: Its possible this is not perfect, we may need to
# consider links which point outside of the chosen
# base directory.
#
if member.type == tarfile.LNKTYPE:
member.linkname = member.linkname[L:]
member.path = member.path[L:]
yield member
# We want to iterate over all paths of a tarball, but getmembers()
# is not enough because some tarballs simply do not contain the leading
# directory paths for the archived files.
def _list_tar_paths(self, tar):
visited = {}
for member in tar.getmembers():
# Remove any possible leading './', offer more consistent behavior
# across tarballs encoded with or without a leading '.'
member_name = member.name.lstrip('./')
if not member.isdir():
# Loop over the components of a path, for a path of a/b/c/d
# we will first visit 'a', then 'a/b' and then 'a/b/c', excluding
# the final component
components = member_name.split('/')
for i in range(len(components) - 1):
dir_component = '/'.join([components[j] for j in range(i + 1)])
if dir_component not in visited:
visited[dir_component] = True
try:
# Dont yield directory members which actually do
# exist in the archive
_ = tar.getmember(dir_component)
except KeyError:
if dir_component != '.':
yield dir_component
continue
# Avoid considering the '.' directory, if any is included in the archive
# this is to avoid the default 'base-dir: *' value behaving differently
# depending on whether the tarball was encoded with a leading '.' or not
elif member_name == '.':
continue
yield member_name
def _find_base_dir(self, tar, pattern):
paths = self._list_tar_paths(tar)
matches = sorted(list(utils.glob(paths, pattern)))
if not matches:
raise SourceError("{}: Could not find base directory matching pattern: {}".format(self, pattern))
return matches[0]
def setup():
return TarSource