blob: b8559428bfad089beffc83ead72af10c6ba99609 [file] [log] [blame]
#
# Copyright (C) 2017-2018 Codethink Limited
# Copyright (C) 2019-2020 Bloomberg Finance LP
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Authors:
# Tristan Maat <tristan.maat@codethink.co.uk>
import os
from ._assetcache import AssetCache
from ._cas.casremote import BlobNotFound
from ._exceptions import ArtifactError, AssetCacheError, CASError, CASRemoteError
from ._protos.buildstream.v2 import artifact_pb2
from . import utils
REMOTE_ASSET_ARTIFACT_URN_TEMPLATE = "urn:fdc:buildstream.build:2020:artifact:{}"
# An ArtifactCache manages artifacts.
#
# Args:
# context (Context): The BuildStream context
#
class ArtifactCache(AssetCache):
def __init__(self, context):
super().__init__(context)
# create artifact directory
self._basedir = context.artifactdir
os.makedirs(self._basedir, exist_ok=True)
# preflight():
#
# Preflight check.
#
def preflight(self):
self.cas.preflight()
# contains():
#
# Check whether the artifact for the specified Element is already available
# in the local artifact cache.
#
# Args:
# element (Element): The Element to check
# key (str): The cache key to use
#
# Returns: True if the artifact is in the cache, False otherwise
#
def contains(self, element, key):
ref = element.get_artifact_name(key)
return os.path.exists(os.path.join(self._basedir, ref))
# list_artifacts():
#
# List artifacts in this cache in LRU order.
#
# Args:
# glob (str): An option glob expression to be used to list artifacts satisfying the glob
#
# Returns:
# ([str]) - A list of artifact names as generated in LRU order
#
def list_artifacts(self, *, glob=None):
return [ref for _, ref in sorted(list(self.list_refs_mtimes(self._basedir, glob_expr=glob)))]
# remove():
#
# Removes the artifact for the specified ref from the local
# artifact cache.
#
# Args:
# ref (artifact_name): The name of the artifact to remove (as
# generated by `Element.get_artifact_name`)
#
def remove(self, ref):
try:
self.remove_ref(ref)
except AssetCacheError as e:
raise ArtifactError("{}".format(e)) from e
# push():
#
# Push committed artifact to remote repository.
#
# Args:
# element (Element): The Element whose artifact is to be pushed
# artifact (Artifact): The artifact being pushed
#
# Returns:
# (bool): True if any remote was updated, False if no pushes were required
#
# Raises:
# (ArtifactError): if there was an error
#
def push(self, element, artifact):
project = element._get_project()
display_key = element._get_display_key()
index_remotes, storage_remotes = self.get_remotes(project.name, True)
artifact_proto = artifact._get_proto()
artifact_digest = self.cas.add_object(buffer=artifact_proto.SerializeToString())
pushed = False
# First push our files to all storage remotes, so that they
# can perform file checks on their end
for remote in storage_remotes:
remote.init()
element.status("Pushing data from artifact {} -> {}".format(display_key.brief, remote))
if self._push_artifact_blobs(artifact, artifact_digest, remote):
element.info("Pushed data from artifact {} -> {}".format(display_key.brief, remote))
else:
element.info(
"Remote ({}) already has all data of artifact {} cached".format(remote, display_key.brief)
)
for remote in index_remotes:
remote.init()
element.status("Pushing artifact {} -> {}".format(display_key.brief, remote))
if self._push_artifact_proto(element, artifact, artifact_digest, remote):
element.info("Pushed artifact {} -> {}".format(display_key.brief, remote))
pushed = True
else:
element.info("Remote ({}) already has artifact {} cached".format(remote, display_key.brief))
return pushed
# pull():
#
# Pull artifact from one of the configured remote repositories.
#
# Args:
# element (Element): The Element whose artifact is to be fetched
# key (str): The cache key to use
# pull_buildtrees (bool): Whether to pull buildtrees or not
#
# Returns:
# (bool): True if pull was successful, False if artifact was not available
#
def pull(self, element, key, *, pull_buildtrees=False):
artifact_digest = None
display_key = key[: self.context.log_key_length]
project = element._get_project()
artifact_name = element.get_artifact_name(key=key)
uri = REMOTE_ASSET_ARTIFACT_URN_TEMPLATE.format(artifact_name)
index_remotes, storage_remotes = self.get_remotes(project.name, False)
errors = []
# Start by pulling our artifact proto, so that we know which
# blobs to pull
for remote in index_remotes:
remote.init()
try:
element.status("Pulling artifact {} <- {}".format(display_key, remote))
response = remote.fetch_blob([uri])
if response:
artifact_digest = response.blob_digest
break
element.info("Remote ({}) does not have artifact {} cached".format(remote, display_key))
except AssetCacheError as e:
element.warn("Could not pull from remote {}: {}".format(remote, e))
errors.append(e)
if errors and not artifact_digest:
raise ArtifactError(
"Failed to pull artifact {}".format(display_key),
detail="\n".join(str(e) for e in errors),
temporary=True,
)
# If we don't have an artifact, we can't exactly pull our
# artifact
if not artifact_digest:
return False
errors = []
# If we do, we can pull it!
for remote in storage_remotes:
remote.init()
try:
element.status("Pulling data for artifact {} <- {}".format(display_key, remote))
if self._pull_artifact_storage(element, key, artifact_digest, remote, pull_buildtrees=pull_buildtrees):
element.info("Pulled artifact {} <- {}".format(display_key, remote))
return True
element.info("Remote ({}) does not have artifact {} cached".format(remote, display_key))
except BlobNotFound as e:
# Not all blobs are available on this remote
element.info("Remote cas ({}) does not have blob {} cached".format(remote, e.blob))
continue
except CASError as e:
element.warn("Could not pull from remote {}: {}".format(remote, e))
errors.append(e)
if errors:
raise ArtifactError(
"Failed to pull artifact {}".format(display_key),
detail="\n".join(str(e) for e in errors),
temporary=True,
)
return False
# link_key():
#
# Add a key for an existing artifact.
#
# Args:
# element (Element): The Element whose artifact is to be linked
# oldkey (str): An existing cache key for the artifact
# newkey (str): A new cache key for the artifact
#
def link_key(self, element, oldkey, newkey):
oldref = element.get_artifact_name(oldkey)
newref = element.get_artifact_name(newkey)
if oldref == newref:
# The two refs are identical, nothing to do
return
utils.safe_link(os.path.join(self._basedir, oldref), os.path.join(self._basedir, newref))
# fetch_missing_blobs():
#
# Fetch missing blobs from configured remote repositories.
#
# Args:
# project (Project): The current project
# missing_blobs (list): The Digests of the blobs to fetch
#
def fetch_missing_blobs(self, project, missing_blobs):
index_remotes, _ = self.get_remotes(project.name, False)
for remote in index_remotes:
if not missing_blobs:
break
remote.init()
# fetch_blobs() will return the blobs that are still missing
missing_blobs = self.cas.fetch_blobs(remote, missing_blobs, allow_partial=True)
if missing_blobs:
raise ArtifactError("Blobs not found on configured artifact servers")
# find_missing_blobs():
#
# Find missing blobs from configured push remote repositories.
#
# Args:
# project (Project): The current project
# missing_blobs (list): The Digests of the blobs to check
#
# Returns:
# (list): The Digests of the blobs missing on at least one push remote
#
def find_missing_blobs(self, project, missing_blobs):
if not missing_blobs:
return []
_, push_remotes = self.get_remotes(project.name, True)
remote_missing_blobs_list = []
for remote in push_remotes:
remote.init()
remote_missing_blobs = self.cas.missing_blobs(missing_blobs, remote=remote)
for blob in remote_missing_blobs:
if blob not in remote_missing_blobs_list:
remote_missing_blobs_list.append(blob)
return remote_missing_blobs_list
# check_remotes_for_element()
#
# Check if the element is available in any of the remotes
#
# Args:
# element (Element): The element to check
#
# Returns:
# (bool): True if the element is available remotely
#
def check_remotes_for_element(self, element):
project = element._get_project()
index_remotes, _ = self.get_remotes(project.name, False)
# If there are no remotes
if not index_remotes:
return False
ref = element.get_artifact_name()
for remote in index_remotes:
remote.init()
if self._query_remote(ref, remote):
return True
return False
################################################
# Local Private Methods #
################################################
# _push_artifact_blobs()
#
# Push the blobs that make up an artifact to the remote server.
#
# Args:
# artifact (Artifact): The artifact whose blobs to push.
# remote (CASRemote): The remote to push the blobs to.
#
# Returns:
# (bool) - True if we uploaded anything, False otherwise.
#
# Raises:
# ArtifactError: If we fail to push blobs (*unless* they're
# already there or we run out of space on the server).
#
def _push_artifact_blobs(self, artifact, artifact_digest, remote):
artifact_proto = artifact._get_proto()
try:
if str(artifact_proto.files):
self.cas._send_directory(remote, artifact_proto.files)
if str(artifact_proto.buildtree):
try:
self.cas._send_directory(remote, artifact_proto.buildtree)
except FileNotFoundError:
pass
if str(artifact_proto.buildroot):
try:
self.cas._send_directory(remote, artifact_proto.buildroot)
except FileNotFoundError:
pass
digests = [artifact_digest, artifact_proto.low_diversity_meta, artifact_proto.high_diversity_meta]
if str(artifact_proto.public_data):
digests.append(artifact_proto.public_data)
for log_file in artifact_proto.logs:
digests.append(log_file.digest)
self.cas.send_blobs(remote, digests)
except CASRemoteError as cas_error:
if cas_error.reason != "cache-too-full":
raise ArtifactError("Failed to push artifact blobs: {}".format(cas_error), temporary=True)
return False
return True
# _push_artifact_proto()
#
# Pushes the artifact proto to remote.
#
# Args:
# element (Element): The element
# artifact (Artifact): The related artifact being pushed
# remote (AssetRemote): Remote to push to
#
# Returns:
# (bool): Whether we pushed the artifact.
#
# Raises:
# ArtifactError: If the push fails for any reason except the
# artifact already existing.
#
def _push_artifact_proto(self, element, artifact, artifact_digest, remote):
artifact_proto = artifact._get_proto()
keys = list(utils._deduplicate([artifact_proto.strong_key, artifact_proto.weak_key]))
artifact_names = [element.get_artifact_name(key=key) for key in keys]
uris = [REMOTE_ASSET_ARTIFACT_URN_TEMPLATE.format(artifact_name) for artifact_name in artifact_names]
try:
response = remote.fetch_blob(uris)
# Skip push if artifact is already on the server
if response and response.blob_digest == artifact_digest:
return False
except AssetCacheError as e:
raise ArtifactError("{}".format(e), temporary=True) from e
referenced_directories = []
if artifact_proto.files:
referenced_directories.append(artifact_proto.files)
if artifact_proto.buildtree:
referenced_directories.append(artifact_proto.buildtree)
if artifact_proto.sources:
referenced_directories.append(artifact_proto.sources)
if artifact_proto.buildroot:
referenced_directories.append(artifact_proto.buildroot)
referenced_blobs = [artifact_proto.low_diversity_meta, artifact_proto.high_diversity_meta] + [
log_file.digest for log_file in artifact_proto.logs
]
try:
remote.push_blob(
uris,
artifact_digest,
references_blobs=referenced_blobs,
references_directories=referenced_directories,
)
except AssetCacheError as e:
raise ArtifactError("{}".format(e), temporary=True) from e
return True
# _pull_artifact_storage():
#
# Pull artifact blobs from the given remote.
#
# Args:
# element (Element): element to pull
# key (str): The specific key for the artifact to pull
# remote (CASRemote): remote to pull from
# pull_buildtree (bool): whether to pull buildtrees or not
#
# Returns:
# (bool): True if we pulled any blobs.
#
# Raises:
# ArtifactError: If the pull failed for any reason except the
# blobs not existing on the server.
#
def _pull_artifact_storage(self, element, key, artifact_digest, remote, pull_buildtrees=False):
artifact_name = element.get_artifact_name(key=key)
try:
# Fetch and parse artifact proto
self.cas.fetch_blobs(remote, [artifact_digest])
artifact = artifact_pb2.Artifact()
with self.cas.open(artifact_digest, "rb") as f:
artifact.ParseFromString(f.read())
# Write the artifact proto to cache
artifact_path = os.path.join(self._basedir, artifact_name)
os.makedirs(os.path.dirname(artifact_path), exist_ok=True)
with utils.save_file_atomic(artifact_path, mode="wb") as f:
f.write(artifact.SerializeToString())
if str(artifact.files):
self.cas._fetch_directory(remote, artifact.files)
if pull_buildtrees:
if str(artifact.buildtree):
self.cas._fetch_directory(remote, artifact.buildtree)
if str(artifact.buildroot):
self.cas._fetch_directory(remote, artifact.buildroot)
digests = [artifact.low_diversity_meta, artifact.high_diversity_meta]
if str(artifact.public_data):
digests.append(artifact.public_data)
for log_digest in artifact.logs:
digests.append(log_digest.digest)
self.cas.fetch_blobs(remote, digests)
except BlobNotFound:
return False
except CASRemoteError as e:
raise ArtifactError("{}".format(e), temporary=True) from e
return True
# _query_remote()
#
# Args:
# ref (str): The artifact ref
# remote (AssetRemote): The remote we want to check
#
# Returns:
# (bool): True if the ref exists in the remote, False otherwise.
#
def _query_remote(self, ref, remote):
uri = REMOTE_ASSET_ARTIFACT_URN_TEMPLATE.format(ref)
try:
response = remote.fetch_blob([uri])
return bool(response)
except AssetCacheError as e:
raise ArtifactError("{}".format(e), temporary=True) from e