blob: 4b5c12e190bd673845c47ec225077011da8a732e [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import logging
from datetime import datetime, timedelta
from functools import wraps
from typing import Any, Callable, Dict, Optional, Union
from flask import current_app as app, request
from flask_caching import Cache
from flask_caching.backends import NullCache
from werkzeug.wrappers.etag import ETagResponseMixin
from superset import db
from superset.extensions import cache_manager
from superset.models.cache import CacheKey
from superset.stats_logger import BaseStatsLogger
from superset.utils.core import json_int_dttm_ser
from superset.utils.hashing import md5_sha_from_dict
config = app.config # type: ignore
stats_logger: BaseStatsLogger = config["STATS_LOGGER"]
logger = logging.getLogger(__name__)
def generate_cache_key(values_dict: Dict[str, Any], key_prefix: str = "") -> str:
hash_str = md5_sha_from_dict(values_dict, default=json_int_dttm_ser)
return f"{key_prefix}{hash_str}"
def set_and_log_cache(
cache_instance: Cache,
cache_key: str,
cache_value: Dict[str, Any],
cache_timeout: Optional[int] = None,
datasource_uid: Optional[str] = None,
) -> None:
if isinstance(cache_instance.cache, NullCache):
return
timeout = cache_timeout if cache_timeout else config["CACHE_DEFAULT_TIMEOUT"]
try:
dttm = datetime.utcnow().isoformat().split(".")[0]
value = {**cache_value, "dttm": dttm}
cache_instance.set(cache_key, value, timeout=timeout)
stats_logger.incr("set_cache_key")
if datasource_uid and config["STORE_CACHE_KEYS_IN_METADATA_DB"]:
ck = CacheKey(
cache_key=cache_key,
cache_timeout=cache_timeout,
datasource_uid=datasource_uid,
)
db.session.add(ck)
except Exception as ex: # pylint: disable=broad-except
# cache.set call can fail if the backend is down or if
# the key is too large or whatever other reasons
logger.warning("Could not cache key %s", cache_key)
logger.exception(ex)
# If a user sets `max_age` to 0, for long the browser should cache the
# resource? Flask-Caching will cache forever, but for the HTTP header we need
# to specify a "far future" date.
ONE_YEAR = 365 * 24 * 60 * 60 # 1 year in seconds
logger = logging.getLogger(__name__)
def view_cache_key(*args: Any, **kwargs: Any) -> str: # pylint: disable=unused-argument
args_hash = hash(frozenset(request.args.items()))
return "view/{}/{}".format(request.path, args_hash)
def memoized_func(
key: Callable[..., str] = view_cache_key, cache: Cache = cache_manager.cache,
) -> Callable[..., Any]:
"""Use this decorator to cache functions that have predefined first arg.
enable_cache is treated as True by default,
except enable_cache = False is passed to the decorated function.
force means whether to force refresh the cache and is treated as False by default,
except force = True is passed to the decorated function.
timeout of cache is set to 600 seconds by default,
except cache_timeout = {timeout in seconds} is passed to the decorated function.
:param key: a callable function that takes function arguments and returns
the caching key.
:param cache: a FlaskCache instance that will store the cache.
"""
def wrap(f: Callable[..., Any]) -> Callable[..., Any]:
def wrapped_f(self: Any, *args: Any, **kwargs: Any) -> Any:
if not kwargs.get("cache", True):
return f(self, *args, **kwargs)
cache_key = key(self, *args, **kwargs)
obj = cache.get(cache_key)
if not kwargs.get("force") and obj is not None:
return obj
obj = f(self, *args, **kwargs)
cache.set(cache_key, obj, timeout=kwargs.get("cache_timeout"))
return obj
return wrapped_f
return wrap
def etag_cache(
cache: Cache = cache_manager.cache,
get_last_modified: Optional[Callable[..., datetime]] = None,
max_age: Optional[Union[int, float]] = None,
raise_for_access: Optional[Callable[..., Any]] = None,
skip: Optional[Callable[..., bool]] = None,
) -> Callable[..., Any]:
"""
A decorator for caching views and handling etag conditional requests.
The decorator adds headers to GET requests that help with caching: Last-
Modified, Expires and ETag. It also handles conditional requests, when the
client send an If-Matches header.
If a cache is set, the decorator will cache GET responses, bypassing the
dataframe serialization. POST requests will still benefit from the
dataframe cache for requests that produce the same SQL.
"""
if max_age is None:
max_age = app.config["CACHE_DEFAULT_TIMEOUT"]
def decorator(f: Callable[..., Any]) -> Callable[..., Any]:
@wraps(f)
def wrapper(*args: Any, **kwargs: Any) -> ETagResponseMixin:
# Check if the user can access the resource
if raise_for_access:
try:
raise_for_access(*args, **kwargs)
except Exception: # pylint: disable=broad-except
# If there's no access, bypass the cache and let the function
# handle the response.
return f(*args, **kwargs)
# for POST requests we can't set cache headers, use the response
# cache nor use conditional requests; this will still use the
# dataframe cache in `superset/viz.py`, though.
if request.method == "POST" or (skip and skip(*args, **kwargs)):
return f(*args, **kwargs)
response = None
try:
# build the cache key from the function arguments and any
# other additional GET arguments (like `form_data`, eg).
key_args = list(args)
key_kwargs = kwargs.copy()
key_kwargs.update(request.args)
cache_key = wrapper.make_cache_key( # type: ignore
f, *key_args, **key_kwargs
)
response = cache.get(cache_key)
except Exception: # pylint: disable=broad-except
if app.debug:
raise
logger.exception("Exception possibly due to cache backend.")
# Check if the cache is stale. Default the content_changed_time to now
# if we don't know when it was last modified.
content_changed_time = datetime.utcnow()
if get_last_modified:
content_changed_time = get_last_modified(*args, **kwargs)
if (
response
and response.last_modified
and response.last_modified.timestamp()
< content_changed_time.timestamp()
):
# Bypass the cache if the response is stale
response = None
# if no response was cached, compute it using the wrapped function
if response is None:
response = f(*args, **kwargs)
# add headers for caching: Last Modified, Expires and ETag
# always revalidate the cache if we're checking permissions or
# if the response was modified
if get_last_modified or raise_for_access:
# `Cache-Control: no-cache` asks the browser to always store
# the cache, but also must validate it with the server.
response.cache_control.no_cache = True
else:
# `Cache-Control: Public` asks the browser to always store
# the cache.
response.cache_control.public = True
response.last_modified = content_changed_time
expiration = max_age or ONE_YEAR # max_age=0 also means far future
response.expires = response.last_modified + timedelta(
seconds=expiration
)
response.add_etag()
# if we have a cache, store the response from the request
try:
cache.set(cache_key, response, timeout=max_age)
except Exception: # pylint: disable=broad-except
if app.debug:
raise
logger.exception("Exception possibly due to cache backend.")
return response.make_conditional(request)
wrapper.uncached = f # type: ignore
wrapper.cache_timeout = max_age # type: ignore
wrapper.make_cache_key = cache._memoize_make_cache_key( # type: ignore # pylint: disable=protected-access
make_name=None, timeout=max_age
)
return wrapper
return decorator