yamlcache: Lookup files in the cache without reading the file

For files that are not part of a junction (i.e. not checked-out into a
temporary directory), use the mtime to see if the file has changed.

For files that are, use the junction element's cache key.
diff --git a/buildstream/_yaml.py b/buildstream/_yaml.py
index 7e12183..f1d16e2 100644
--- a/buildstream/_yaml.py
+++ b/buildstream/_yaml.py
@@ -197,12 +197,12 @@
 
     try:
         data = None
-        with open(filename) as f:
-            contents = f.read()
-        if yaml_cache:
-            data, key = yaml_cache.get(project, filename, contents, copy_tree)
 
+        if yaml_cache:
+            data, key = yaml_cache.get(project, filename, copy_tree)
         if not data:
+            with open(filename) as f:
+                contents = f.read()
             data = load_data(contents, file, copy_tree=copy_tree)
             if yaml_cache:
                 yaml_cache.put_from_key(project, filename, key, data)
diff --git a/buildstream/_yamlcache.py b/buildstream/_yamlcache.py
index 8911700..07a1b8d 100644
--- a/buildstream/_yamlcache.py
+++ b/buildstream/_yamlcache.py
@@ -127,15 +127,14 @@
     # Args:
     #    project (Project) or None: The project this file is in, if it exists.
     #    filepath (str): The absolute path to the file.
-    #    contents (str): The contents of the file to be cached
     #    copy_tree (bool): Whether the data should make a copy when it's being generated
     #                      (i.e. exactly as when called in yaml)
     #
     # Returns:
     #    (decorated dict): The parsed yaml from the cache, or None if the file isn't in the cache.
     #    (str):            The key used to look up the parsed yaml in the cache
-    def get(self, project, filepath, contents, copy_tree):
-        key = self._calculate_key(contents, copy_tree)
+    def get(self, project, filepath, copy_tree):
+        key = self._calculate_key(project, filepath, copy_tree)
         data = self._get(project, filepath, key)
         return data, key
 
@@ -146,12 +145,11 @@
     # Args:
     #    project (Project): The project this file is in.
     #    filepath (str): The path to the file.
-    #    contents (str): The contents of the file that has been cached
     #    copy_tree (bool): Whether the data should make a copy when it's being generated
     #                      (i.e. exactly as when called in yaml)
     #    value (decorated dict): The data to put into the cache.
-    def put(self, project, filepath, contents, copy_tree, value):
-        key = self._calculate_key(contents, copy_tree)
+    def put(self, project, filepath, copy_tree, value):
+        key = self._calculate_key(project, filepath, copy_tree)
         self.put_from_key(project, filepath, key, value)
 
     # put_from_key():
@@ -213,13 +211,23 @@
     # Calculates a key for putting into the cache.
     #
     # Args:
-    #    (basic object)... : Any number of strictly-ordered basic objects
+    #    project (Project) or None: The project this file is in.
+    #    filepath (str): The path to the file.
+    #    copy_tree (bool): Whether the data should make a copy when it's being generated
+    #                      (i.e. exactly as when called in yaml)
     #
     # Returns:
     #   (str): A key made out of every arg passed in
     @staticmethod
-    def _calculate_key(*args):
-        string = pickle.dumps(args)
+    def _calculate_key(project, filepath, copy_tree):
+        if project and project.junction:
+            # files in a junction only change if the junction element changes
+            # NOTE: This may change when junction workspaces are revisited/fixed
+            content_key = project.junction._get_cache_key()
+        else:
+            stat = os.stat(filepath)
+            content_key = stat.st_mtime
+        string = pickle.dumps(content_key, copy_tree)
         return hashlib.sha1(string).hexdigest()
 
     # _get():
diff --git a/tests/frontend/yamlcache.py b/tests/frontend/yamlcache.py
index 99b5d71..5dc52d1 100644
--- a/tests/frontend/yamlcache.py
+++ b/tests/frontend/yamlcache.py
@@ -14,10 +14,10 @@
 
 
 def generate_project(tmpdir, ref_storage, with_junction, name="test"):
-    if with_junction == 'junction':
+    if with_junction:
         subproject_dir = generate_project(
             tmpdir, ref_storage,
-            'no-junction', name='test-subproject'
+            False, name='test-subproject'
         )
 
     project_dir = os.path.join(tmpdir, name)
@@ -33,7 +33,7 @@
     _yaml.dump(project_conf, project_conf_path)
 
     # elements
-    if with_junction == 'junction':
+    if with_junction:
         junction_name = 'junction.bst'
         junction_dir = os.path.join(project_dir, elements_path)
         junction_path = os.path.join(project_dir, elements_path, junction_name)
@@ -58,12 +58,6 @@
         yield yamlcache, project
 
 
-def yamlcache_key(yamlcache, in_file, copy_tree=False):
-    with open(in_file) as f:
-        key = yamlcache._calculate_key(f.read(), copy_tree)
-    return key
-
-
 def modified_file(input_file, tmpdir):
     with open(input_file) as f:
         data = f.read()
@@ -77,12 +71,13 @@
 
 
 @pytest.mark.parametrize('ref_storage', ['inline', 'project.refs'])
-@pytest.mark.parametrize('with_junction', ['no-junction', 'junction'])
-@pytest.mark.parametrize('move_project', ['move', 'no-move'])
-def test_yamlcache_used(cli, tmpdir, ref_storage, with_junction, move_project):
+@pytest.mark.parametrize('with_junction', [True, False], ids=['junction', 'no-junction'])
+def test_yamlcache_used(cli, tmpdir, ref_storage, with_junction):
     # Generate the project
     project = generate_project(str(tmpdir), ref_storage, with_junction)
-    if with_junction == 'junction':
+    element_path = os.path.join(project, 'elements', 'test.bst')
+    element_mtime = 0
+    if with_junction:
         result = cli.run(project=project, args=['source', 'fetch', '--track', 'junction.bst'])
         result.assert_success()
 
@@ -90,17 +85,14 @@
     result = cli.run(project=project, args=['show', 'test.bst'])
     result.assert_success()
 
-    element_path = os.path.join(project, 'elements', 'test.bst')
     with with_yamlcache(project) as (yc, prj):
         # Check that it's in the cache
         assert yc.is_cached(prj, element_path)
 
-        # *Absolutely* horrible cache corruption to check it's being used
-        # Modifying the data from the cache is fraught with danger,
-        # so instead I'll load a modified version of the original file
+        # Modify files in the yaml cache to test whether it's being used
         temppath = modified_file(element_path, str(tmpdir))
         contents = _yaml.load(temppath, copy_tree=False, project=prj)
-        key = yamlcache_key(yc, element_path)
+        key = yc._calculate_key(prj, element_path, copy_tree=False)
         yc.put_from_key(prj, element_path, key, contents)
 
     # Show that a variable has been added
@@ -112,13 +104,13 @@
 
 
 @pytest.mark.parametrize('ref_storage', ['inline', 'project.refs'])
-@pytest.mark.parametrize('with_junction', ['junction', 'no-junction'])
+@pytest.mark.parametrize('with_junction', [True, False], ids=['junction', 'no-junction'])
 def test_yamlcache_changed_file(cli, tmpdir, ref_storage, with_junction):
     # i.e. a file is cached, the file is changed, loading the file (with cache) returns new data
     # inline and junction can only be changed by opening a workspace
     # Generate the project
     project = generate_project(str(tmpdir), ref_storage, with_junction)
-    if with_junction == 'junction':
+    if with_junction:
         result = cli.run(project=project, args=['source', 'fetch', '--track', 'junction.bst'])
         result.assert_success()