thirdparty/rocksdb/include/rocksdb/options.h - nifi-minifi-cpp - Git at Google

 // Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
 //  This source code is licensed under both the GPLv2 (found in the
 //  COPYING file in the root directory) and Apache 2.0 License
 //  (found in the LICENSE.Apache file in the root directory).
 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file. See the AUTHORS file for names of contributors.

 #ifndef STORAGE_ROCKSDB_INCLUDE_OPTIONS_H_
 #define STORAGE_ROCKSDB_INCLUDE_OPTIONS_H_

 #include <stddef.h>
 #include <stdint.h>
 #include <string>
 #include <memory>
 #include <vector>
 #include <limits>
 #include <unordered_map>

 #include "rocksdb/advanced_options.h"
 #include "rocksdb/comparator.h"
 #include "rocksdb/env.h"
 #include "rocksdb/listener.h"
 #include "rocksdb/universal_compaction.h"
 #include "rocksdb/version.h"
 #include "rocksdb/write_buffer_manager.h"

 #ifdef max
 #undef max
 #endif

 namespace rocksdb {

 class Cache;
 class CompactionFilter;
 class CompactionFilterFactory;
 class Comparator;
 class Env;
 enum InfoLogLevel : unsigned char;
 class SstFileManager;
 class FilterPolicy;
 class Logger;
 class MergeOperator;
 class Snapshot;
 class MemTableRepFactory;
 class RateLimiter;
 class Slice;
 class Statistics;
 class InternalKeyComparator;
 class WalFilter;

 // DB contents are stored in a set of blocks, each of which holds a
 // sequence of key,value pairs.  Each block may be compressed before
 // being stored in a file.  The following enum describes which
 // compression method (if any) is used to compress a block.
 enum CompressionType : unsigned char {
   // NOTE: do not change the values of existing entries, as these are
   // part of the persistent format on disk.
   kNoCompression = 0x0,
   kSnappyCompression = 0x1,
   kZlibCompression = 0x2,
   kBZip2Compression = 0x3,
   kLZ4Compression = 0x4,
   kLZ4HCCompression = 0x5,
   kXpressCompression = 0x6,
   kZSTD = 0x7,

   // Only use kZSTDNotFinalCompression if you have to use ZSTD lib older than
   // 0.8.0 or consider a possibility of downgrading the service or copying
   // the database files to another service running with an older version of
   // RocksDB that doesn't have kZSTD. Otherwise, you should use kZSTD. We will
   // eventually remove the option from the public API.
   kZSTDNotFinalCompression = 0x40,

   // kDisableCompressionOption is used to disable some compression options.
   kDisableCompressionOption = 0xff,
 };

 struct Options;

 struct ColumnFamilyOptions : public AdvancedColumnFamilyOptions {
   // The function recovers options to a previous version. Only 4.6 or later
   // versions are supported.
   ColumnFamilyOptions* OldDefaults(int rocksdb_major_version = 4,
                                    int rocksdb_minor_version = 6);

   // Some functions that make it easier to optimize RocksDB
   // Use this if your DB is very small (like under 1GB) and you don't want to
   // spend lots of memory for memtables.
   ColumnFamilyOptions* OptimizeForSmallDb();

   // Use this if you don't need to keep the data sorted, i.e. you'll never use
   // an iterator, only Put() and Get() API calls
   //
   // Not supported in ROCKSDB_LITE
   ColumnFamilyOptions* OptimizeForPointLookup(
       uint64_t block_cache_size_mb);

   // Default values for some parameters in ColumnFamilyOptions are not
   // optimized for heavy workloads and big datasets, which means you might
   // observe write stalls under some conditions. As a starting point for tuning
   // RocksDB options, use the following two functions:
   // * OptimizeLevelStyleCompaction -- optimizes level style compaction
   // * OptimizeUniversalStyleCompaction -- optimizes universal style compaction
   // Universal style compaction is focused on reducing Write Amplification
   // Factor for big data sets, but increases Space Amplification. You can learn
   // more about the different styles here:
   // https://github.com/facebook/rocksdb/wiki/Rocksdb-Architecture-Guide
   // Make sure to also call IncreaseParallelism(), which will provide the
   // biggest performance gains.
   // Note: we might use more memory than memtable_memory_budget during high
   // write rate period
   //
   // OptimizeUniversalStyleCompaction is not supported in ROCKSDB_LITE
   ColumnFamilyOptions* OptimizeLevelStyleCompaction(
       uint64_t memtable_memory_budget = 512 * 1024 * 1024);
   ColumnFamilyOptions* OptimizeUniversalStyleCompaction(
       uint64_t memtable_memory_budget = 512 * 1024 * 1024);

   // -------------------
   // Parameters that affect behavior

   // Comparator used to define the order of keys in the table.
   // Default: a comparator that uses lexicographic byte-wise ordering
   //
   // REQUIRES: The client must ensure that the comparator supplied
   // here has the same name and orders keys *exactly* the same as the
   // comparator provided to previous open calls on the same DB.
   const Comparator* comparator = BytewiseComparator();

   // REQUIRES: The client must provide a merge operator if Merge operation
   // needs to be accessed. Calling Merge on a DB without a merge operator
   // would result in Status::NotSupported. The client must ensure that the
   // merge operator supplied here has the same name and *exactly* the same
   // semantics as the merge operator provided to previous open calls on
   // the same DB. The only exception is reserved for upgrade, where a DB
   // previously without a merge operator is introduced to Merge operation
   // for the first time. It's necessary to specify a merge operator when
   // opening the DB in this case.
   // Default: nullptr
   std::shared_ptr<MergeOperator> merge_operator = nullptr;

   // A single CompactionFilter instance to call into during compaction.
   // Allows an application to modify/delete a key-value during background
   // compaction.
   //
   // If the client requires a new compaction filter to be used for different
   // compaction runs, it can specify compaction_filter_factory instead of this
   // option.  The client should specify only one of the two.
   // compaction_filter takes precedence over compaction_filter_factory if
   // client specifies both.
   //
   // If multithreaded compaction is being used, the supplied CompactionFilter
   // instance may be used from different threads concurrently and so should be
   // thread-safe.
   //
   // Default: nullptr
   const CompactionFilter* compaction_filter = nullptr;

   // This is a factory that provides compaction filter objects which allow
   // an application to modify/delete a key-value during background compaction.
   //
   // A new filter will be created on each compaction run.  If multithreaded
   // compaction is being used, each created CompactionFilter will only be used
   // from a single thread and so does not need to be thread-safe.
   //
   // Default: nullptr
   std::shared_ptr<CompactionFilterFactory> compaction_filter_factory = nullptr;

   // -------------------
   // Parameters that affect performance

   // Amount of data to build up in memory (backed by an unsorted log
   // on disk) before converting to a sorted on-disk file.
   //
   // Larger values increase performance, especially during bulk loads.
   // Up to max_write_buffer_number write buffers may be held in memory
   // at the same time,
   // so you may wish to adjust this parameter to control memory usage.
   // Also, a larger write buffer will result in a longer recovery time
   // the next time the database is opened.
   //
   // Note that write_buffer_size is enforced per column family.
   // See db_write_buffer_size for sharing memory across column families.
   //
   // Default: 64MB
   //
   // Dynamically changeable through SetOptions() API
   size_t write_buffer_size = 64 << 20;

   // Compress blocks using the specified compression algorithm.  This
   // parameter can be changed dynamically.
   //
   // Default: kSnappyCompression, if it's supported. If snappy is not linked
   // with the library, the default is kNoCompression.
   //
   // Typical speeds of kSnappyCompression on an Intel(R) Core(TM)2 2.4GHz:
   //    ~200-500MB/s compression
   //    ~400-800MB/s decompression
   // Note that these speeds are significantly faster than most
   // persistent storage speeds, and therefore it is typically never
   // worth switching to kNoCompression.  Even if the input data is
   // incompressible, the kSnappyCompression implementation will
   // efficiently detect that and will switch to uncompressed mode.
   CompressionType compression;

   // Compression algorithm that will be used for the bottommost level that
   // contain files. If level-compaction is used, this option will only affect
   // levels after base level.
   //
   // Default: kDisableCompressionOption (Disabled)
   CompressionType bottommost_compression = kDisableCompressionOption;

   // different options for compression algorithms
   CompressionOptions compression_opts;

   // Number of files to trigger level-0 compaction. A value <0 means that
   // level-0 compaction will not be triggered by number of files at all.
   //
   // Default: 4
   //
   // Dynamically changeable through SetOptions() API
   int level0_file_num_compaction_trigger = 4;

   // If non-nullptr, use the specified function to determine the
   // prefixes for keys.  These prefixes will be placed in the filter.
   // Depending on the workload, this can reduce the number of read-IOP
   // cost for scans when a prefix is passed via ReadOptions to
   // db.NewIterator().  For prefix filtering to work properly,
   // "prefix_extractor" and "comparator" must be such that the following
   // properties hold:
   //
   // 1) key.starts_with(prefix(key))
   // 2) Compare(prefix(key), key) <= 0.
   // 3) If Compare(k1, k2) <= 0, then Compare(prefix(k1), prefix(k2)) <= 0
   // 4) prefix(prefix(key)) == prefix(key)
   //
   // Default: nullptr
   std::shared_ptr<const SliceTransform> prefix_extractor = nullptr;

   // Control maximum total data size for a level.
   // max_bytes_for_level_base is the max total for level-1.
   // Maximum number of bytes for level L can be calculated as
   // (max_bytes_for_level_base) * (max_bytes_for_level_multiplier ^ (L-1))
   // For example, if max_bytes_for_level_base is 200MB, and if
   // max_bytes_for_level_multiplier is 10, total data size for level-1
   // will be 200MB, total file size for level-2 will be 2GB,
   // and total file size for level-3 will be 20GB.
   //
   // Default: 256MB.
   //
   // Dynamically changeable through SetOptions() API
   uint64_t max_bytes_for_level_base = 256 * 1048576;

   // Disable automatic compactions. Manual compactions can still
   // be issued on this column family
   //
   // Dynamically changeable through SetOptions() API
   bool disable_auto_compactions = false;

   // This is a factory that provides TableFactory objects.
   // Default: a block-based table factory that provides a default
   // implementation of TableBuilder and TableReader with default
   // BlockBasedTableOptions.
   std::shared_ptr<TableFactory> table_factory;

   // Create ColumnFamilyOptions with default values for all fields
   ColumnFamilyOptions();
   // Create ColumnFamilyOptions from Options
   explicit ColumnFamilyOptions(const Options& options);

   void Dump(Logger* log) const;
 };

 enum class WALRecoveryMode : char {
   // Original levelDB recovery
   // We tolerate incomplete record in trailing data on all logs
   // Use case : This is legacy behavior (default)
   kTolerateCorruptedTailRecords = 0x00,
   // Recover from clean shutdown
   // We don't expect to find any corruption in the WAL
   // Use case : This is ideal for unit tests and rare applications that
   // can require high consistency guarantee
   kAbsoluteConsistency = 0x01,
   // Recover to point-in-time consistency
   // We stop the WAL playback on discovering WAL inconsistency
   // Use case : Ideal for systems that have disk controller cache like
   // hard disk, SSD without super capacitor that store related data
   kPointInTimeRecovery = 0x02,
   // Recovery after a disaster
   // We ignore any corruption in the WAL and try to salvage as much data as
   // possible
   // Use case : Ideal for last ditch effort to recover data or systems that
   // operate with low grade unrelated data
   kSkipAnyCorruptedRecords = 0x03,
 };

 struct DbPath {
   std::string path;
   uint64_t target_size;  // Target size of total files under the path, in byte.

   DbPath() : target_size(0) {}
   DbPath(const std::string& p, uint64_t t) : path(p), target_size(t) {}
 };


 struct DBOptions {
   // The function recovers options to the option as in version 4.6.
   DBOptions* OldDefaults(int rocksdb_major_version = 4,
                          int rocksdb_minor_version = 6);

   // Some functions that make it easier to optimize RocksDB

   // Use this if your DB is very small (like under 1GB) and you don't want to
   // spend lots of memory for memtables.
   DBOptions* OptimizeForSmallDb();

 #ifndef ROCKSDB_LITE
   // By default, RocksDB uses only one background thread for flush and
   // compaction. Calling this function will set it up such that total of
   // `total_threads` is used. Good value for `total_threads` is the number of
   // cores. You almost definitely want to call this function if your system is
   // bottlenecked by RocksDB.
   DBOptions* IncreaseParallelism(int total_threads = 16);
 #endif  // ROCKSDB_LITE

   // If true, the database will be created if it is missing.
   // Default: false
   bool create_if_missing = false;

   // If true, missing column families will be automatically created.
   // Default: false
   bool create_missing_column_families = false;

   // If true, an error is raised if the database already exists.
   // Default: false
   bool error_if_exists = false;

   // If true, RocksDB will aggressively check consistency of the data.
   // Also, if any of the  writes to the database fails (Put, Delete, Merge,
   // Write), the database will switch to read-only mode and fail all other
   // Write operations.
   // In most cases you want this to be set to true.
   // Default: true
   bool paranoid_checks = true;

   // Use the specified object to interact with the environment,
   // e.g. to read/write files, schedule background work, etc.
   // Default: Env::Default()
   Env* env = Env::Default();

   // Use to control write rate of flush and compaction. Flush has higher
   // priority than compaction. Rate limiting is disabled if nullptr.
   // If rate limiter is enabled, bytes_per_sync is set to 1MB by default.
   // Default: nullptr
   std::shared_ptr<RateLimiter> rate_limiter = nullptr;

   // Use to track SST files and control their file deletion rate.
   //
   // Features:
   //  - Throttle the deletion rate of the SST files.
   //  - Keep track the total size of all SST files.
   //  - Set a maximum allowed space limit for SST files that when reached
   //    the DB wont do any further flushes or compactions and will set the
   //    background error.
   //  - Can be shared between multiple dbs.
   // Limitations:
   //  - Only track and throttle deletes of SST files in
   //    first db_path (db_name if db_paths is empty).
   //
   // Default: nullptr
   std::shared_ptr<SstFileManager> sst_file_manager = nullptr;

   // Any internal progress/error information generated by the db will
   // be written to info_log if it is non-nullptr, or to a file stored
   // in the same directory as the DB contents if info_log is nullptr.
   // Default: nullptr
   std::shared_ptr<Logger> info_log = nullptr;

 #ifdef NDEBUG
       InfoLogLevel info_log_level = INFO_LEVEL;
 #else
       InfoLogLevel info_log_level = DEBUG_LEVEL;
 #endif  // NDEBUG

   // Number of open files that can be used by the DB.  You may need to
   // increase this if your database has a large working set. Value -1 means
   // files opened are always kept open. You can estimate number of files based
   // on target_file_size_base and target_file_size_multiplier for level-based
   // compaction. For universal-style compaction, you can usually set it to -1.
   // Default: -1
   int max_open_files = -1;

   // If max_open_files is -1, DB will open all files on DB::Open(). You can
   // use this option to increase the number of threads used to open the files.
   // Default: 16
   int max_file_opening_threads = 16;

   // Once write-ahead logs exceed this size, we will start forcing the flush of
   // column families whose memtables are backed by the oldest live WAL file
   // (i.e. the ones that are causing all the space amplification). If set to 0
   // (default), we will dynamically choose the WAL size limit to be
   // [sum of all write_buffer_size * max_write_buffer_number] * 4
   // Default: 0
   uint64_t max_total_wal_size = 0;

   // If non-null, then we should collect metrics about database operations
   std::shared_ptr<Statistics> statistics = nullptr;

   // If true, then every store to stable storage will issue a fsync.
   // If false, then every store to stable storage will issue a fdatasync.
   // This parameter should be set to true while storing data to
   // filesystem like ext3 that can lose files after a reboot.
   // Default: false
   // Note: on many platforms fdatasync is defined as fsync, so this parameter
   // would make no difference. Refer to fdatasync definition in this code base.
   bool use_fsync = false;

   // A list of paths where SST files can be put into, with its target size.
   // Newer data is placed into paths specified earlier in the vector while
   // older data gradually moves to paths specified later in the vector.
   //
   // For example, you have a flash device with 10GB allocated for the DB,
   // as well as a hard drive of 2TB, you should config it to be:
   //   [{"/flash_path", 10GB}, {"/hard_drive", 2TB}]
   //
   // The system will try to guarantee data under each path is close to but
   // not larger than the target size. But current and future file sizes used
   // by determining where to place a file are based on best-effort estimation,
   // which means there is a chance that the actual size under the directory
   // is slightly more than target size under some workloads. User should give
   // some buffer room for those cases.
   //
   // If none of the paths has sufficient room to place a file, the file will
   // be placed to the last path anyway, despite to the target size.
   //
   // Placing newer data to earlier paths is also best-efforts. User should
   // expect user files to be placed in higher levels in some extreme cases.
   //
   // If left empty, only one path will be used, which is db_name passed when
   // opening the DB.
   // Default: empty
   std::vector<DbPath> db_paths;

   // This specifies the info LOG dir.
   // If it is empty, the log files will be in the same dir as data.
   // If it is non empty, the log files will be in the specified dir,
   // and the db data dir's absolute path will be used as the log file
   // name's prefix.
   std::string db_log_dir = "";

   // This specifies the absolute dir path for write-ahead logs (WAL).
   // If it is empty, the log files will be in the same dir as data,
   //   dbname is used as the data dir by default
   // If it is non empty, the log files will be in kept the specified dir.
   // When destroying the db,
   //   all log files in wal_dir and the dir itself is deleted
   std::string wal_dir = "";

   // The periodicity when obsolete files get deleted. The default
   // value is 6 hours. The files that get out of scope by compaction
   // process will still get automatically delete on every compaction,
   // regardless of this setting
   uint64_t delete_obsolete_files_period_micros = 6ULL * 60 * 60 * 1000000;

   // Maximum number of concurrent background jobs (compactions and flushes).
   int max_background_jobs = 2;

   // NOT SUPPORTED ANYMORE: RocksDB automatically decides this based on the
   // value of max_background_jobs. This option is ignored.
   int base_background_compactions = -1;

   // NOT SUPPORTED ANYMORE: RocksDB automatically decides this based on the
   // value of max_background_jobs. For backwards compatibility we will set
   // `max_background_jobs = max_background_compactions + max_background_flushes`
   // in the case where user sets at least one of `max_background_compactions` or
   // `max_background_flushes` (we replace -1 by 1 in case one option is unset).
   //
   // Maximum number of concurrent background compaction jobs, submitted to
   // the default LOW priority thread pool.
   //
   // If you're increasing this, also consider increasing number of threads in
   // LOW priority thread pool. For more information, see
   // Env::SetBackgroundThreads
   // Default: -1
   int max_background_compactions = -1;

   // This value represents the maximum number of threads that will
   // concurrently perform a compaction job by breaking it into multiple,
   // smaller ones that are run simultaneously.
   // Default: 1 (i.e. no subcompactions)
   uint32_t max_subcompactions = 1;

   // NOT SUPPORTED ANYMORE: RocksDB automatically decides this based on the
   // value of max_background_jobs. For backwards compatibility we will set
   // `max_background_jobs = max_background_compactions + max_background_flushes`
   // in the case where user sets at least one of `max_background_compactions` or
   // `max_background_flushes`.
   //
   // Maximum number of concurrent background memtable flush jobs, submitted by
   // default to the HIGH priority thread pool. If the HIGH priority thread pool
   // is configured to have zero threads, flush jobs will share the LOW priority
   // thread pool with compaction jobs.
   //
   // It is important to use both thread pools when the same Env is shared by
   // multiple db instances. Without a separate pool, long running compaction
   // jobs could potentially block memtable flush jobs of other db instances,
   // leading to unnecessary Put stalls.
   //
   // If you're increasing this, also consider increasing number of threads in
   // HIGH priority thread pool. For more information, see
   // Env::SetBackgroundThreads
   // Default: -1
   int max_background_flushes = -1;

   // Specify the maximal size of the info log file. If the log file
   // is larger than `max_log_file_size`, a new info log file will
   // be created.
   // If max_log_file_size == 0, all logs will be written to one
   // log file.
   size_t max_log_file_size = 0;

   // Time for the info log file to roll (in seconds).
   // If specified with non-zero value, log file will be rolled
   // if it has been active longer than `log_file_time_to_roll`.
   // Default: 0 (disabled)
   // Not supported in ROCKSDB_LITE mode!
   size_t log_file_time_to_roll = 0;

   // Maximal info log files to be kept.
   // Default: 1000
   size_t keep_log_file_num = 1000;

   // Recycle log files.
   // If non-zero, we will reuse previously written log files for new
   // logs, overwriting the old data.  The value indicates how many
   // such files we will keep around at any point in time for later
   // use.  This is more efficient because the blocks are already
   // allocated and fdatasync does not need to update the inode after
   // each write.
   // Default: 0
   size_t recycle_log_file_num = 0;

   // manifest file is rolled over on reaching this limit.
   // The older manifest file be deleted.
   // The default value is MAX_INT so that roll-over does not take place.
   uint64_t max_manifest_file_size = std::numeric_limits<uint64_t>::max();

   // Number of shards used for table cache.
   int table_cache_numshardbits = 6;

   // NOT SUPPORTED ANYMORE
   // int table_cache_remove_scan_count_limit;

   // The following two fields affect how archived logs will be deleted.
   // 1. If both set to 0, logs will be deleted asap and will not get into
   //    the archive.
   // 2. If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0,
   //    WAL files will be checked every 10 min and if total size is greater
   //    then WAL_size_limit_MB, they will be deleted starting with the
   //    earliest until size_limit is met. All empty files will be deleted.
   // 3. If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then
   //    WAL files will be checked every WAL_ttl_secondsi / 2 and those that
   //    are older than WAL_ttl_seconds will be deleted.
   // 4. If both are not 0, WAL files will be checked every 10 min and both
   //    checks will be performed with ttl being first.
   uint64_t WAL_ttl_seconds = 0;
   uint64_t WAL_size_limit_MB = 0;

   // Number of bytes to preallocate (via fallocate) the manifest
   // files.  Default is 4mb, which is reasonable to reduce random IO
   // as well as prevent overallocation for mounts that preallocate
   // large amounts of data (such as xfs's allocsize option).
   size_t manifest_preallocation_size = 4 * 1024 * 1024;

   // Allow the OS to mmap file for reading sst tables. Default: false
   bool allow_mmap_reads = false;

   // Allow the OS to mmap file for writing.
   // DB::SyncWAL() only works if this is set to false.
   // Default: false
   bool allow_mmap_writes = false;

   // Enable direct I/O mode for read/write
   // they may or may not improve performance depending on the use case
   //
   // Files will be opened in "direct I/O" mode
   // which means that data r/w from the disk will not be cached or
   // buffered. The hardware buffer of the devices may however still
   // be used. Memory mapped files are not impacted by these parameters.

   // Use O_DIRECT for user reads
   // Default: false
   // Not supported in ROCKSDB_LITE mode!
   bool use_direct_reads = false;

   // Use O_DIRECT for both reads and writes in background flush and compactions
   // When true, we also force new_table_reader_for_compaction_inputs to true.
   // Default: false
   // Not supported in ROCKSDB_LITE mode!
   bool use_direct_io_for_flush_and_compaction = false;

   // If false, fallocate() calls are bypassed
   bool allow_fallocate = true;

   // Disable child process inherit open files. Default: true
   bool is_fd_close_on_exec = true;

   // NOT SUPPORTED ANYMORE -- this options is no longer used
   bool skip_log_error_on_recovery = false;

   // if not zero, dump rocksdb.stats to LOG every stats_dump_period_sec
   // Default: 600 (10 min)
   unsigned int stats_dump_period_sec = 600;

   // If set true, will hint the underlying file system that the file
   // access pattern is random, when a sst file is opened.
   // Default: true
   bool advise_random_on_open = true;

   // Amount of data to build up in memtables across all column
   // families before writing to disk.
   //
   // This is distinct from write_buffer_size, which enforces a limit
   // for a single memtable.
   //
   // This feature is disabled by default. Specify a non-zero value
   // to enable it.
   //
   // Default: 0 (disabled)
   size_t db_write_buffer_size = 0;

   // The memory usage of memtable will report to this object. The same object
   // can be passed into multiple DBs and it will track the sum of size of all
   // the DBs. If the total size of all live memtables of all the DBs exceeds
   // a limit, a flush will be triggered in the next DB to which the next write
   // is issued.
   //
   // If the object is only passed to on DB, the behavior is the same as
   // db_write_buffer_size. When write_buffer_manager is set, the value set will
   // override db_write_buffer_size.
   //
   // This feature is disabled by default. Specify a non-zero value
   // to enable it.
   //
   // Default: null
   std::shared_ptr<WriteBufferManager> write_buffer_manager = nullptr;

   // Specify the file access pattern once a compaction is started.
   // It will be applied to all input files of a compaction.
   // Default: NORMAL
   enum AccessHint {
       NONE,
       NORMAL,
       SEQUENTIAL,
       WILLNEED
   };
   AccessHint access_hint_on_compaction_start = NORMAL;

   // If true, always create a new file descriptor and new table reader
   // for compaction inputs. Turn this parameter on may introduce extra
   // memory usage in the table reader, if it allocates extra memory
   // for indexes. This will allow file descriptor prefetch options
   // to be set for compaction input files and not to impact file
   // descriptors for the same file used by user queries.
   // Suggest to enable BlockBasedTableOptions.cache_index_and_filter_blocks
   // for this mode if using block-based table.
   //
   // Default: false
   bool new_table_reader_for_compaction_inputs = false;

   // If non-zero, we perform bigger reads when doing compaction. If you're
   // running RocksDB on spinning disks, you should set this to at least 2MB.
   // That way RocksDB's compaction is doing sequential instead of random reads.
   //
   // When non-zero, we also force new_table_reader_for_compaction_inputs to
   // true.
   //
   // Default: 0
   size_t compaction_readahead_size = 0;

   // This is a maximum buffer size that is used by WinMmapReadableFile in
   // unbuffered disk I/O mode. We need to maintain an aligned buffer for
   // reads. We allow the buffer to grow until the specified value and then
   // for bigger requests allocate one shot buffers. In unbuffered mode we
   // always bypass read-ahead buffer at ReadaheadRandomAccessFile
   // When read-ahead is required we then make use of compaction_readahead_size
   // value and always try to read ahead. With read-ahead we always
   // pre-allocate buffer to the size instead of growing it up to a limit.
   //
   // This option is currently honored only on Windows
   //
   // Default: 1 Mb
   //
   // Special value: 0 - means do not maintain per instance buffer. Allocate
   //                per request buffer and avoid locking.
   size_t random_access_max_buffer_size = 1024 * 1024;

   // This is the maximum buffer size that is used by WritableFileWriter.
   // On Windows, we need to maintain an aligned buffer for writes.
   // We allow the buffer to grow until it's size hits the limit in buffered
   // IO and fix the buffer size when using direct IO to ensure alignment of
   // write requests if the logical sector size is unusual
   //
   // Default: 1024 * 1024 (1 MB)
   size_t writable_file_max_buffer_size = 1024 * 1024;


   // Use adaptive mutex, which spins in the user space before resorting
   // to kernel. This could reduce context switch when the mutex is not
   // heavily contended. However, if the mutex is hot, we could end up
   // wasting spin time.
   // Default: false
   bool use_adaptive_mutex = false;

   // Create DBOptions with default values for all fields
   DBOptions();
   // Create DBOptions from Options
   explicit DBOptions(const Options& options);

   void Dump(Logger* log) const;

   // Allows OS to incrementally sync files to disk while they are being
   // written, asynchronously, in the background. This operation can be used
   // to smooth out write I/Os over time. Users shouldn't rely on it for
   // persistency guarantee.
   // Issue one request for every bytes_per_sync written. 0 turns it off.
   // Default: 0
   //
   // You may consider using rate_limiter to regulate write rate to device.
   // When rate limiter is enabled, it automatically enables bytes_per_sync
   // to 1MB.
   //
   // This option applies to table files
   uint64_t bytes_per_sync = 0;

   // Same as bytes_per_sync, but applies to WAL files
   // Default: 0, turned off
   uint64_t wal_bytes_per_sync = 0;

   // A vector of EventListeners which call-back functions will be called
   // when specific RocksDB event happens.
   std::vector<std::shared_ptr<EventListener>> listeners;

   // If true, then the status of the threads involved in this DB will
   // be tracked and available via GetThreadList() API.
   //
   // Default: false
   bool enable_thread_tracking = false;

   // The limited write rate to DB if soft_pending_compaction_bytes_limit or
   // level0_slowdown_writes_trigger is triggered, or we are writing to the
   // last mem table allowed and we allow more than 3 mem tables. It is
   // calculated using size of user write requests before compression.
   // RocksDB may decide to slow down more if the compaction still
   // gets behind further.
   // If the value is 0, we will infer a value from `rater_limiter` value
   // if it is not empty, or 16MB if `rater_limiter` is empty. Note that
   // if users change the rate in `rate_limiter` after DB is opened,
   // `delayed_write_rate` won't be adjusted.
   //
   // Unit: byte per second.
   //
   // Default: 0
   uint64_t delayed_write_rate = 0;

   // By default, a single write thread queue is maintained. The thread gets
   // to the head of the queue becomes write batch group leader and responsible
   // for writing to WAL and memtable for the batch group.
   //
   // If enable_pipelined_write is true, separate write thread queue is
   // maintained for WAL write and memtable write. A write thread first enter WAL
   // writer queue and then memtable writer queue. Pending thread on the WAL
   // writer queue thus only have to wait for previous writers to finish their
   // WAL writing but not the memtable writing. Enabling the feature may improve
   // write throughput and reduce latency of the prepare phase of two-phase
   // commit.
   //
   // Default: false
   bool enable_pipelined_write = false;

   // If true, allow multi-writers to update mem tables in parallel.
   // Only some memtable_factory-s support concurrent writes; currently it
   // is implemented only for SkipListFactory.  Concurrent memtable writes
   // are not compatible with inplace_update_support or filter_deletes.
   // It is strongly recommended to set enable_write_thread_adaptive_yield
   // if you are going to use this feature.
   //
   // Default: true
   bool allow_concurrent_memtable_write = true;

   // If true, threads synchronizing with the write batch group leader will
   // wait for up to write_thread_max_yield_usec before blocking on a mutex.
   // This can substantially improve throughput for concurrent workloads,
   // regardless of whether allow_concurrent_memtable_write is enabled.
   //
   // Default: true
   bool enable_write_thread_adaptive_yield = true;

   // The maximum number of microseconds that a write operation will use
   // a yielding spin loop to coordinate with other write threads before
   // blocking on a mutex.  (Assuming write_thread_slow_yield_usec is
   // set properly) increasing this value is likely to increase RocksDB
   // throughput at the expense of increased CPU usage.
   //
   // Default: 100
   uint64_t write_thread_max_yield_usec = 100;

   // The latency in microseconds after which a std::this_thread::yield
   // call (sched_yield on Linux) is considered to be a signal that
   // other processes or threads would like to use the current core.
   // Increasing this makes writer threads more likely to take CPU
   // by spinning, which will show up as an increase in the number of
   // involuntary context switches.
   //
   // Default: 3
   uint64_t write_thread_slow_yield_usec = 3;

   // If true, then DB::Open() will not update the statistics used to optimize
   // compaction decision by loading table properties from many files.
   // Turning off this feature will improve DBOpen time especially in
   // disk environment.
   //
   // Default: false
   bool skip_stats_update_on_db_open = false;

   // Recovery mode to control the consistency while replaying WAL
   // Default: kPointInTimeRecovery
   WALRecoveryMode wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery;

   // if set to false then recovery will fail when a prepared
   // transaction is encountered in the WAL
   bool allow_2pc = false;

   // A global cache for table-level rows.
   // Default: nullptr (disabled)
   // Not supported in ROCKSDB_LITE mode!
   std::shared_ptr<Cache> row_cache = nullptr;

 #ifndef ROCKSDB_LITE
   // A filter object supplied to be invoked while processing write-ahead-logs
   // (WALs) during recovery. The filter provides a way to inspect log
   // records, ignoring a particular record or skipping replay.
   // The filter is invoked at startup and is invoked from a single-thread
   // currently.
   WalFilter* wal_filter = nullptr;
 #endif  // ROCKSDB_LITE

   // If true, then DB::Open / CreateColumnFamily / DropColumnFamily
   // / SetOptions will fail if options file is not detected or properly
   // persisted.
   //
   // DEFAULT: false
   bool fail_if_options_file_error = false;

   // If true, then print malloc stats together with rocksdb.stats
   // when printing to LOG.
   // DEFAULT: false
   bool dump_malloc_stats = false;

   // By default RocksDB replay WAL logs and flush them on DB open, which may
   // create very small SST files. If this option is enabled, RocksDB will try
   // to avoid (but not guarantee not to) flush during recovery. Also, existing
   // WAL logs will be kept, so that if crash happened before flush, we still
   // have logs to recover from.
   //
   // DEFAULT: false
   bool avoid_flush_during_recovery = false;

   // By default RocksDB will flush all memtables on DB close if there are
   // unpersisted data (i.e. with WAL disabled) The flush can be skip to speedup
   // DB close. Unpersisted data WILL BE LOST.
   //
   // DEFAULT: false
   //
   // Dynamically changeable through SetDBOptions() API.
   bool avoid_flush_during_shutdown = false;

   // Set this option to true during creation of database if you want
   // to be able to ingest behind (call IngestExternalFile() skipping keys
   // that already exist, rather than overwriting matching keys).
   // Setting this option to true will affect 2 things:
   // 1) Disable some internal optimizations around SST file compression
   // 2) Reserve bottom-most level for ingested files only.
   // 3) Note that num_levels should be >= 3 if this option is turned on.
   //
   // DEFAULT: false
   // Immutable.
   bool allow_ingest_behind = false;

   // If enabled it uses two queues for writes, one for the ones with
   // disable_memtable and one for the ones that also write to memtable. This
   // allows the memtable writes not to lag behind other writes. It can be used
   // to optimize MySQL 2PC in which only the commits, which are serial, write to
   // memtable.
   bool concurrent_prepare = false;

   // If true WAL is not flushed automatically after each write. Instead it
   // relies on manual invocation of FlushWAL to write the WAL buffer to its
   // file.
   bool manual_wal_flush = false;
 };

 // Options to control the behavior of a database (passed to DB::Open)
 struct Options : public DBOptions, public ColumnFamilyOptions {
   // Create an Options object with default values for all fields.
   Options() : DBOptions(), ColumnFamilyOptions() {}

   Options(const DBOptions& db_options,
           const ColumnFamilyOptions& column_family_options)
       : DBOptions(db_options), ColumnFamilyOptions(column_family_options) {}

   // The function recovers options to the option as in version 4.6.
   Options* OldDefaults(int rocksdb_major_version = 4,
                        int rocksdb_minor_version = 6);

   void Dump(Logger* log) const;

   void DumpCFOptions(Logger* log) const;

   // Some functions that make it easier to optimize RocksDB

   // Set appropriate parameters for bulk loading.
   // The reason that this is a function that returns "this" instead of a
   // constructor is to enable chaining of multiple similar calls in the future.
   //

   // All data will be in level 0 without any automatic compaction.
   // It's recommended to manually call CompactRange(NULL, NULL) before reading
   // from the database, because otherwise the read can be very slow.
   Options* PrepareForBulkLoad();

   // Use this if your DB is very small (like under 1GB) and you don't want to
   // spend lots of memory for memtables.
   Options* OptimizeForSmallDb();
 };

 //
 // An application can issue a read request (via Get/Iterators) and specify
 // if that read should process data that ALREADY resides on a specified cache
 // level. For example, if an application specifies kBlockCacheTier then the
 // Get call will process data that is already processed in the memtable or
 // the block cache. It will not page in data from the OS cache or data that
 // resides in storage.
 enum ReadTier {
   kReadAllTier = 0x0,     // data in memtable, block cache, OS cache or storage
   kBlockCacheTier = 0x1,  // data in memtable or block cache
   kPersistedTier = 0x2,   // persisted data.  When WAL is disabled, this option
                           // will skip data in memtable.
                           // Note that this ReadTier currently only supports
                           // Get and MultiGet and does not support iterators.
   kMemtableTier = 0x3     // data in memtable. used for memtable-only iterators.
 };

 // Options that control read operations
 struct ReadOptions {
   // If "snapshot" is non-nullptr, read as of the supplied snapshot
   // (which must belong to the DB that is being read and which must
   // not have been released).  If "snapshot" is nullptr, use an implicit
   // snapshot of the state at the beginning of this read operation.
   // Default: nullptr
   const Snapshot* snapshot;

   // "iterate_upper_bound" defines the extent upto which the forward iterator
   // can returns entries. Once the bound is reached, Valid() will be false.
   // "iterate_upper_bound" is exclusive ie the bound value is
   // not a valid entry.  If iterator_extractor is not null, the Seek target
   // and iterator_upper_bound need to have the same prefix.
   // This is because ordering is not guaranteed outside of prefix domain.
   // There is no lower bound on the iterator. If needed, that can be easily
   // implemented.
   //
   // Default: nullptr
   const Slice* iterate_upper_bound;

   // If non-zero, NewIterator will create a new table reader which
   // performs reads of the given size. Using a large size (> 2MB) can
   // improve the performance of forward iteration on spinning disks.
   // Default: 0
   size_t readahead_size;

   // A threshold for the number of keys that can be skipped before failing an
   // iterator seek as incomplete. The default value of 0 should be used to
   // never fail a request as incomplete, even on skipping too many keys.
   // Default: 0
   uint64_t max_skippable_internal_keys;

   // Specify if this read request should process data that ALREADY
   // resides on a particular cache. If the required data is not
   // found at the specified cache, then Status::Incomplete is returned.
   // Default: kReadAllTier
   ReadTier read_tier;

   // If true, all data read from underlying storage will be
   // verified against corresponding checksums.
   // Default: true
   bool verify_checksums;

   // Should the "data block"/"index block"/"filter block" read for this
   // iteration be cached in memory?
   // Callers may wish to set this field to false for bulk scans.
   // Default: true
   bool fill_cache;

   // Specify to create a tailing iterator -- a special iterator that has a
   // view of the complete database (i.e. it can also be used to read newly
   // added data) and is optimized for sequential reads. It will return records
   // that were inserted into the database after the creation of the iterator.
   // Default: false
   // Not supported in ROCKSDB_LITE mode!
   bool tailing;

   // Specify to create a managed iterator -- a special iterator that
   // uses less resources by having the ability to free its underlying
   // resources on request.
   // Default: false
   // Not supported in ROCKSDB_LITE mode!
   bool managed;

   // Enable a total order seek regardless of index format (e.g. hash index)
   // used in the table. Some table format (e.g. plain table) may not support
   // this option.
   // If true when calling Get(), we also skip prefix bloom when reading from
   // block based table. It provides a way to read existing data after
   // changing implementation of prefix extractor.
   bool total_order_seek;

   // Enforce that the iterator only iterates over the same prefix as the seek.
   // This option is effective only for prefix seeks, i.e. prefix_extractor is
   // non-null for the column family and total_order_seek is false.  Unlike
   // iterate_upper_bound, prefix_same_as_start only works within a prefix
   // but in both directions.
   // Default: false
   bool prefix_same_as_start;

   // Keep the blocks loaded by the iterator pinned in memory as long as the
   // iterator is not deleted, If used when reading from tables created with
   // BlockBasedTableOptions::use_delta_encoding = false,
   // Iterator's property "rocksdb.iterator.is-key-pinned" is guaranteed to
   // return 1.
   // Default: false
   bool pin_data;

   // If true, when PurgeObsoleteFile is called in CleanupIteratorState, we
   // schedule a background job in the flush job queue and delete obsolete files
   // in background.
   // Default: false
   bool background_purge_on_iterator_cleanup;

   // If true, keys deleted using the DeleteRange() API will be visible to
   // readers until they are naturally deleted during compaction. This improves
   // read performance in DBs with many range deletions.
   // Default: false
   bool ignore_range_deletions;

   ReadOptions();
   ReadOptions(bool cksum, bool cache);
 };

 // Options that control write operations
 struct WriteOptions {
   // If true, the write will be flushed from the operating system
   // buffer cache (by calling WritableFile::Sync()) before the write
   // is considered complete.  If this flag is true, writes will be
   // slower.
   //
   // If this flag is false, and the machine crashes, some recent
   // writes may be lost.  Note that if it is just the process that
   // crashes (i.e., the machine does not reboot), no writes will be
   // lost even if sync==false.
   //
   // In other words, a DB write with sync==false has similar
   // crash semantics as the "write()" system call.  A DB write
   // with sync==true has similar crash semantics to a "write()"
   // system call followed by "fdatasync()".
   //
   // Default: false
   bool sync;

   // If true, writes will not first go to the write ahead log,
   // and the write may got lost after a crash.
   bool disableWAL;

   // If true and if user is trying to write to column families that don't exist
   // (they were dropped),  ignore the write (don't return an error). If there
   // are multiple writes in a WriteBatch, other writes will succeed.
   // Default: false
   bool ignore_missing_column_families;

   // If true and we need to wait or sleep for the write request, fails
   // immediately with Status::Incomplete().
   bool no_slowdown;

   // If true, this write request is of lower priority if compaction is
   // behind. In this case, no_slowdown = true, the request will be cancelled
   // immediately with Status::Incomplete() returned. Otherwise, it will be
   // slowed down. The slowdown value is determined by RocksDB to guarantee
   // it introduces minimum impacts to high priority writes.
   //
   // Default: false
   bool low_pri;

   WriteOptions()
       : sync(false),
         disableWAL(false),
         ignore_missing_column_families(false),
         no_slowdown(false),
         low_pri(false) {}
 };

 // Options that control flush operations
 struct FlushOptions {
   // If true, the flush will wait until the flush is done.
   // Default: true
   bool wait;

   FlushOptions() : wait(true) {}
 };

 // Create a Logger from provided DBOptions
 extern Status CreateLoggerFromOptions(const std::string& dbname,
                                       const DBOptions& options,
                                       std::shared_ptr<Logger>* logger);

 // CompactionOptions are used in CompactFiles() call.
 struct CompactionOptions {
   // Compaction output compression type
   // Default: snappy
   CompressionType compression;
   // Compaction will create files of size `output_file_size_limit`.
   // Default: MAX, which means that compaction will create a single file
   uint64_t output_file_size_limit;

   CompactionOptions()
       : compression(kSnappyCompression),
         output_file_size_limit(std::numeric_limits<uint64_t>::max()) {}
 };

 // For level based compaction, we can configure if we want to skip/force
 // bottommost level compaction.
 enum class BottommostLevelCompaction {
   // Skip bottommost level compaction
   kSkip,
   // Only compact bottommost level if there is a compaction filter
   // This is the default option
   kIfHaveCompactionFilter,
   // Always compact bottommost level
   kForce,
 };

 // CompactRangeOptions is used by CompactRange() call.
 struct CompactRangeOptions {
   // If true, no other compaction will run at the same time as this
   // manual compaction
   bool exclusive_manual_compaction = true;
   // If true, compacted files will be moved to the minimum level capable
   // of holding the data or given level (specified non-negative target_level).
   bool change_level = false;
   // If change_level is true and target_level have non-negative value, compacted
   // files will be moved to target_level.
   int target_level = -1;
   // Compaction outputs will be placed in options.db_paths[target_path_id].
   // Behavior is undefined if target_path_id is out of range.
   uint32_t target_path_id = 0;
   // By default level based compaction will only compact the bottommost level
   // if there is a compaction filter
   BottommostLevelCompaction bottommost_level_compaction =
       BottommostLevelCompaction::kIfHaveCompactionFilter;
 };

 // IngestExternalFileOptions is used by IngestExternalFile()
 struct IngestExternalFileOptions {
   // Can be set to true to move the files instead of copying them.
   bool move_files = false;
   // If set to false, an ingested file keys could appear in existing snapshots
   // that where created before the file was ingested.
   bool snapshot_consistency = true;
   // If set to false, IngestExternalFile() will fail if the file key range
   // overlaps with existing keys or tombstones in the DB.
   bool allow_global_seqno = true;
   // If set to false and the file key range overlaps with the memtable key range
   // (memtable flush required), IngestExternalFile will fail.
   bool allow_blocking_flush = true;
   // Set to true if you would like duplicate keys in the file being ingested
   // to be skipped rather than overwriting existing data under that key.
   // Usecase: back-fill of some historical data in the database without
   // over-writing existing newer version of data.
   // This option could only be used if the DB has been running
   // with allow_ingest_behind=true since the dawn of time.
   // All files will be ingested at the bottommost level with seqno=0.
   bool ingest_behind = false;
 };

 }  // namespace rocksdb

 #endif  // STORAGE_ROCKSDB_INCLUDE_OPTIONS_H_