blob: 07a1b8d5fce3af99d2ff64f14905b9b8885a6ee8 [file] [log] [blame]
#
# Copyright 2018 Bloomberg Finance LP
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library. If not, see <http://www.gnu.org/licenses/>.
#
# Authors:
# Jonathan Maw <jonathan.maw@codethink.co.uk>
import os
import pickle
import hashlib
import io
import sys
from contextlib import contextmanager
from collections import namedtuple
from ._context import Context
from . import _yaml
YAML_CACHE_FILENAME = "yaml_cache.pickle"
# YamlCache()
#
# A cache that wraps around the loading of yaml in projects.
#
# The recommended way to use a YamlCache is:
# with YamlCache.open(context) as yamlcache:
# # Load all the yaml
# ...
#
# Args:
# context (Context): The invocation Context
#
class YamlCache():
def __init__(self, context):
self._project_caches = {}
self._context = context
##################
# Public Methods #
##################
# is_cached():
#
# Checks whether a file is cached.
#
# Args:
# project (Project): The project this file is in.
# filepath (str): The path to the file, *relative to the project's directory*.
#
# Returns:
# (bool): Whether the file is cached.
def is_cached(self, project, filepath):
cache_path = self._get_filepath(project, filepath)
project_name = self.get_project_name(project)
try:
project_cache = self._project_caches[project_name]
if cache_path in project_cache.elements:
return True
except KeyError:
pass
return False
# open():
#
# Return an instance of the YamlCache which writes to disk when it leaves scope.
#
# Args:
# context (Context): The context.
# cachefile (str): The path to the cache file.
#
# Returns:
# (YamlCache): A YamlCache.
@staticmethod
@contextmanager
def open(context, cachefile):
# Try to load from disk first
cache = None
if os.path.exists(cachefile):
try:
with open(cachefile, "rb") as f:
cache = BstUnpickler(f, context).load()
except EOFError:
# The file was empty
pass
except pickle.UnpicklingError as e:
sys.stderr.write("Failed to load YamlCache, {}\n".format(e))
# Failed to load from disk, create a new one
if not cache:
cache = YamlCache(context)
yield cache
cache._write(cachefile)
# get_cache_file():
#
# Retrieves a path to the yaml cache file.
#
# Returns:
# (str): The path to the cache file
@staticmethod
def get_cache_file(top_dir):
return os.path.join(top_dir, ".bst", YAML_CACHE_FILENAME)
# get():
#
# Gets a parsed file from the cache.
#
# Args:
# project (Project) or None: The project this file is in, if it exists.
# filepath (str): The absolute path to the file.
# copy_tree (bool): Whether the data should make a copy when it's being generated
# (i.e. exactly as when called in yaml)
#
# Returns:
# (decorated dict): The parsed yaml from the cache, or None if the file isn't in the cache.
# (str): The key used to look up the parsed yaml in the cache
def get(self, project, filepath, copy_tree):
key = self._calculate_key(project, filepath, copy_tree)
data = self._get(project, filepath, key)
return data, key
# put():
#
# Puts a parsed file into the cache.
#
# Args:
# project (Project): The project this file is in.
# filepath (str): The path to the file.
# copy_tree (bool): Whether the data should make a copy when it's being generated
# (i.e. exactly as when called in yaml)
# value (decorated dict): The data to put into the cache.
def put(self, project, filepath, copy_tree, value):
key = self._calculate_key(project, filepath, copy_tree)
self.put_from_key(project, filepath, key, value)
# put_from_key():
#
# Put a parsed file into the cache when given a key.
#
# Args:
# project (Project): The project this file is in.
# filepath (str): The path to the file.
# key (str): The key to the file within the cache. Typically, this is the
# value of `calculate_key()` with the file's unparsed contents
# and any relevant metadata passed in.
# value (decorated dict): The data to put into the cache.
def put_from_key(self, project, filepath, key, value):
cache_path = self._get_filepath(project, filepath)
project_name = self.get_project_name(project)
try:
project_cache = self._project_caches[project_name]
except KeyError:
project_cache = self._project_caches[project_name] = CachedProject({})
project_cache.elements[cache_path] = CachedYaml(key, value)
###################
# Private Methods #
###################
# Writes the yaml cache to the specified path.
#
# Args:
# path (str): The path to the cache file.
def _write(self, path):
parent_dir = os.path.dirname(path)
os.makedirs(parent_dir, exist_ok=True)
with open(path, "wb") as f:
BstPickler(f).dump(self)
# _get_filepath():
#
# Returns a file path relative to a project if passed, or the original path if
# the project is None
#
# Args:
# project (Project) or None: The project the filepath exists within
# full_path (str): The path that the returned path is based on
#
# Returns:
# (str): The path to the file, relative to a project if it exists
def _get_filepath(self, project, full_path):
if project:
assert full_path.startswith(project.directory)
filepath = os.path.relpath(full_path, project.directory)
else:
filepath = full_path
return filepath
# _calculate_key():
#
# Calculates a key for putting into the cache.
#
# Args:
# project (Project) or None: The project this file is in.
# filepath (str): The path to the file.
# copy_tree (bool): Whether the data should make a copy when it's being generated
# (i.e. exactly as when called in yaml)
#
# Returns:
# (str): A key made out of every arg passed in
@staticmethod
def _calculate_key(project, filepath, copy_tree):
if project and project.junction:
# files in a junction only change if the junction element changes
# NOTE: This may change when junction workspaces are revisited/fixed
content_key = project.junction._get_cache_key()
else:
stat = os.stat(filepath)
content_key = stat.st_mtime
string = pickle.dumps(content_key, copy_tree)
return hashlib.sha1(string).hexdigest()
# _get():
#
# Gets a parsed file from the cache when given a key.
#
# Args:
# project (Project): The project this file is in.
# filepath (str): The path to the file.
# key (str): The key to the file within the cache. Typically, this is the
# value of `calculate_key()` with the file's unparsed contents
# and any relevant metadata passed in.
#
# Returns:
# (decorated dict): The parsed yaml from the cache, or None if the file isn't in the cache.
def _get(self, project, filepath, key):
cache_path = self._get_filepath(project, filepath)
project_name = self.get_project_name(project)
try:
project_cache = self._project_caches[project_name]
try:
cachedyaml = project_cache.elements[cache_path]
if cachedyaml._key == key:
# We've unpickled the YamlCache, but not the specific file
if cachedyaml._contents is None:
cachedyaml._contents = BstUnpickler.loads(cachedyaml._pickled_contents, self._context)
return cachedyaml._contents
except KeyError:
pass
except KeyError:
pass
return None
# get_project_name():
#
# Gets a name appropriate for Project. Projects must use their junction's
# name if present, otherwise elements with the same contents under the
# same path with identically-named projects are considered the same yaml
# object, despite existing in different Projects.
#
# Args:
# project (Project): The project this file is in, or None.
#
# Returns:
# (str): The project's junction's name if present, the project's name,
# or an empty string if there is no project
@staticmethod
def get_project_name(project):
if project:
if project.junction:
project_name = project.junction.name
else:
project_name = project.name
else:
project_name = ""
return project_name
CachedProject = namedtuple('CachedProject', ['elements'])
class CachedYaml():
def __init__(self, key, contents):
self._key = key
self.set_contents(contents)
# Sets the contents of the CachedYaml.
#
# Args:
# contents (provenanced dict): The contents to put in the cache.
#
def set_contents(self, contents):
self._contents = contents
self._pickled_contents = BstPickler.dumps(contents)
# Pickling helper method, prevents 'contents' from being serialised
def __getstate__(self):
data = self.__dict__.copy()
data['_contents'] = None
return data
# In _yaml.load, we have a ProvenanceFile that stores the project the file
# came from. Projects can't be pickled, but it's always going to be the same
# project between invocations (unless the entire project is moved but the
# file stayed in the same place)
class BstPickler(pickle.Pickler):
def persistent_id(self, obj):
if isinstance(obj, _yaml.ProvenanceFile):
if obj.project:
# ProvenanceFile's project object cannot be stored as it is.
project_tag = YamlCache.get_project_name(obj.project)
# ProvenanceFile's filename must be stored relative to the
# project, as the project dir may move.
name = os.path.relpath(obj.name, obj.project.directory)
else:
project_tag = None
name = obj.name
return ("ProvenanceFile", name, obj.shortname, project_tag)
elif isinstance(obj, Context):
return ("Context",)
else:
return None
@staticmethod
def dumps(obj):
stream = io.BytesIO()
BstPickler(stream).dump(obj)
stream.seek(0)
return stream.read()
class BstUnpickler(pickle.Unpickler):
def __init__(self, file, context):
super().__init__(file)
self._context = context
def persistent_load(self, pid):
if pid[0] == "ProvenanceFile":
_, tagged_name, shortname, project_tag = pid
if project_tag is not None:
for p in self._context.get_projects():
if YamlCache.get_project_name(p) == project_tag:
project = p
break
name = os.path.join(project.directory, tagged_name)
if not project:
projects = [YamlCache.get_project_name(p) for p in self._context.get_projects()]
raise pickle.UnpicklingError("No project with name {} found in {}"
.format(project_tag, projects))
else:
project = None
name = tagged_name
return _yaml.ProvenanceFile(name, shortname, project)
elif pid[0] == "Context":
return self._context
else:
raise pickle.UnpicklingError("Unsupported persistent object, {}".format(pid))
@staticmethod
def loads(text, context):
stream = io.BytesIO()
stream.write(bytes(text))
stream.seek(0)
return BstUnpickler(stream, context).load()