// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.


#ifndef IMPALA_RUNTIME_LIB_CACHE_H
#define IMPALA_RUNTIME_LIB_CACHE_H

#include <string>
#include <boost/scoped_ptr.hpp>
#include <boost/unordered_map.hpp>
#include <boost/unordered_set.hpp>
#include <boost/thread/mutex.hpp>
#include "common/atomic.h"
#include "common/object-pool.h"
#include "common/status.h"

namespace impala {

class RuntimeState;

/// Process-wide cache of dynamically-linked libraries loaded from HDFS.
/// These libraries can either be shared objects, llvm modules or jars. For
/// shared objects, when we load the shared object, we dlopen() it and keep
/// it in our process. For modules, we store the symbols in the module to
/// service symbol lookups. We can't cache the module since it (i.e. the external
/// module) is consumed when it is linked with the query codegen module.
//
/// Locking strategy: We don't want to grab a big lock across all operations since
/// one of the operations is copying a file from HDFS. With one lock that would
/// prevent any UDFs from running on the system. Instead, we have a global lock
/// that is taken when doing the cache lookup, but is not taken during any blocking calls.
/// During the block calls, we take the per-lib lock.
//
/// Entry lifetime management: We cannot delete the entry while a query is
/// using the library. When the caller requests a ptr into the library, they
/// are given the entry handle and must decrement the ref count when they
/// are done.
/// Note: Explicitly managing this reference count at the client is error-prone. See the
/// api for accessing a path, GetLocalPath(), that uses the handle's scope to manage the
/// reference count.
//
/// TODO:
/// - refresh libraries
/// - better cached module management
/// - improve the api to be less error-prone (IMPALA-6439)
struct LibCacheEntry;
class LibCacheEntryHandle;

class LibCache {
 public:
  enum LibType {
    TYPE_SO,      // Shared object
    TYPE_IR,      // IR intermediate
    TYPE_JAR,     // Java jar file. We don't care about the contents in the BE.
  };

  static LibCache* instance() { return LibCache::instance_.get(); }

  /// Calls dlclose on all cached handles.
  ~LibCache();

  /// Initializes the libcache. Must be called before any other APIs.
  static Status Init();

  /// Gets the local 'path' used to cache the file stored at the global 'hdfs_lib_file'. If
  /// this file is not already on the local fs, or if the cached entry's last modified
  /// is older than expected mtime, 'exp_mtime', it copies it and caches the result.
  /// An 'exp_mtime' of -1 makes the mtime check a no-op.
  ///
  /// 'handle' must remain in scope while 'path' is used. The reference count to the
  /// underlying cache entry is decremented when 'handle' goes out-of-scope.
  ///
  /// Returns an error if 'hdfs_lib_file' cannot be copied to the local fs or if
  /// exp_mtime differs from the mtime on the file system.
  /// If error is due to refresh, then the entry will be removed from the cache.
  Status GetLocalPath(const std::string& hdfs_lib_file, LibType type, time_t exp_mtime,
      LibCacheEntryHandle* handle, string* path);

  /// Returns status.ok() if the symbol exists in 'hdfs_lib_file', non-ok otherwise.
  /// If status.ok() is true, 'mtime' is set to the cache entry's last modified time.
  /// If an mtime is not applicable, for example, if lookup is for a builtin, then
  /// a default mtime of -1 is set.
  /// If 'quiet' is true, the error status for non-Java unfound symbols will not be
  /// logged.
  Status CheckSymbolExists(const std::string& hdfs_lib_file, LibType type,
      const std::string& symbol, bool quiet, time_t* mtime);

  /// Returns a pointer to the function for the given library and symbol.
  /// If 'hdfs_lib_file' is empty, the symbol is looked up in the impalad process.
  /// Otherwise, 'hdfs_lib_file' should be the HDFS path to a shared library (.so) file.
  /// dlopen handles and symbols are cached.
  /// Only usable if 'hdfs_lib_file' refers to a shared object.
  //
  /// If entry is non-null and *entry is null, *entry will be set to the cached entry. If
  /// entry is non-null and *entry is non-null, *entry will be reused (i.e., the use count
  /// is not increased). The caller must call DecrementUseCount(*entry) when it is done
  /// using fn_ptr and it is no longer valid to use fn_ptr.
  //
  /// If 'quiet' is true, returned error statuses will not be logged.
  /// If the entry is already cached, if its last modified time is older than
  /// expected mtime, 'exp_mtime', the entry is refreshed.
  /// An 'exp_mtime' of -1 makes the mtime check a no-op.
  /// An error is returned if exp_mtime differs from the mtime on the file system.
  /// If error is due to refresh, then the entry will be removed from the cache.
  /// TODO: api is error-prone. upgrade to LibCacheEntryHandle (see IMPALA-6439).
  Status GetSoFunctionPtr(const std::string& hdfs_lib_file, const std::string& symbol,
      time_t exp_mtime, void** fn_ptr, LibCacheEntry** entry, bool quiet = false);

  /// Marks the entry for 'hdfs_lib_file' as needing to be refreshed if the file in HDFS is
  /// newer than the local cached copied. The refresh will occur the next time the entry is
  /// accessed.
  void SetNeedsRefresh(const std::string& hdfs_lib_file);

  /// See comment in GetSoFunctionPtr().
  void DecrementUseCount(LibCacheEntry* entry);

  /// Removes the cache entry for 'hdfs_lib_file'
  void RemoveEntry(const std::string& hdfs_lib_file);

  /// Removes all cached entries.
  void DropCache();

 private:
  /// Singleton instance. Instantiated in Init().
  static boost::scoped_ptr<LibCache> instance_;

  /// dlopen() handle for the current process (i.e. impalad).
  void* current_process_handle_;

  /// The number of libs that have been copied from HDFS to the local FS.
  /// This is appended to the local fs path to remove collisions.
  AtomicInt64 num_libs_copied_;

  /// Protects lib_cache_. For lock ordering, this lock must always be taken before
  /// the per entry lock.
  boost::mutex lock_;

  /// Maps HDFS library path => cache entry.
  /// Entries in the cache need to be explicitly deleted.
  typedef boost::unordered_map<std::string, LibCacheEntry*> LibMap;
  LibMap lib_cache_;

  LibCache();
  LibCache(LibCache const& l); // disable copy ctor
  LibCache& operator=(LibCache const& l); // disable assignment

  Status InitInternal();

  /// Returns the cache entry for 'hdfs_lib_file'. If this library has not been
  /// copied locally, it will copy it and add a new LibCacheEntry to 'lib_cache_'.
  /// If the entry is already cached, if its last modified time is older than
  /// expected mtime, 'exp_mtime', the entry is refreshed. Result is returned in *entry.
  /// An 'exp_mtime' of -1 makes the mtime check a no-op.
  /// An error is returned if exp_mtime differs from the mtime on the file system.
  /// No locks should be taken before calling this. On return the entry's lock is
  /// taken and returned in *entry_lock.
  /// If an error is returned, there will be no entry in lib_cache_ and *entry is NULL.
  Status GetCacheEntry(const std::string& hdfs_lib_file, LibType type, time_t exp_mtime,
      boost::unique_lock<boost::mutex>* entry_lock, LibCacheEntry** entry);

  /// Implementation to get the cache entry for 'hdfs_lib_file'. Errors are returned
  /// without evicting the cache entry if the status is not OK and *entry is not NULL.
  Status GetCacheEntryInternal(const std::string& hdfs_lib_file, LibType type,
      time_t exp_mtime, boost::unique_lock<boost::mutex>* entry_lock,
      LibCacheEntry** entry);

  /// Returns iter's cache entry in 'entry' with 'entry_lock' held if entry does not
  /// need to be refreshed.
  /// If entry needs to be refreshed, then it is removed and '*entry' is set to nullptr.
  /// The entry is refreshed if needs_refresh is set and its mtime is
  /// older than the file on the fs OR its mtime is older than the
  /// 'exp_mtime' argument.
  /// An 'exp_mtime' of -1 makes the mtime check a no-op.
  /// An error is returned if exp_mtime differs from the mtime on the file system.
  /// If an error occurs when refreshing the entry, the entry is removed.
  /// The cache lock must be held prior to calling this method. On return the entry's
  /// lock is taken and returned in '*entry_lock' if entry does not need to be refreshed.
  /// TODO: cleanup this method's interface and how the outputs are used.
  Status RefreshCacheEntry(const std::string& hdfs_lib_file, LibType type,
      time_t exp_mtime, const LibMap::iterator& iter,
      boost::unique_lock<boost::mutex>* entry_lock, LibCacheEntry** entry);

  /// 'hdfs_lib_file' is copied locally and 'entry' is initialized with its contents.
  /// An error is returned if exp_mtime differs from the mtime on the file system.
  /// An 'exp_mtime' of -1 makes the mtime check a no-op.
  /// No locks are assumed held; 'entry' should be visible only to a single thread.
  Status LoadCacheEntry(const std::string& hdfs_lib_file, time_t exp_mtime, LibType type,
      LibCacheEntry* entry);

  /// Utility function for generating a filename unique to this process and
  /// 'hdfs_path'. This is to prevent multiple impalad processes or different library files
  /// with the same name from clobbering each other. 'hdfs_path' should be the full path
  /// (including the filename) of the file we're going to copy to the local FS, and
  /// 'local_dir' is the local directory prefix of the returned path.
  std::string MakeLocalPath(const std::string& hdfs_path, const std::string& local_dir);

  /// Implementation to remove an entry from the cache.
  /// lock_ must be held. The entry's lock should not be held.
  void RemoveEntryInternal(
      const std::string& hdfs_lib_file, const LibMap::iterator& entry_iterator);
};

/// Handle for a LibCacheEntry that decrements its reference count when the handle is
/// destroyed or re-used for another entry.
class LibCacheEntryHandle {
 public:
  LibCacheEntryHandle() {}
  ~LibCacheEntryHandle();

 private:
  friend class LibCache;

  LibCacheEntry* entry() const { return entry_; }
  void SetEntry(LibCacheEntry* entry) {
    if (entry_ != nullptr) LibCache::instance()->DecrementUseCount(entry);
    entry_ = entry;
  }

  LibCacheEntry* entry_ = nullptr;

  DISALLOW_COPY_AND_ASSIGN(LibCacheEntryHandle);
};

}

#endif
