| // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
| // This source code is licensed under both the GPLv2 (found in the |
| // COPYING file in the root directory) and Apache 2.0 License |
| // (found in the LICENSE.Apache file in the root directory). |
| |
| #ifndef GFLAGS |
| #include <cstdio> |
| int main() { |
| fprintf(stderr, "Please install gflags to run rocksdb tools\n"); |
| return 1; |
| } |
| #else |
| |
| #include <gflags/gflags.h> |
| |
| #include "db/db_impl.h" |
| #include "db/dbformat.h" |
| #include "monitoring/histogram.h" |
| #include "rocksdb/db.h" |
| #include "rocksdb/slice_transform.h" |
| #include "rocksdb/table.h" |
| #include "table/block_based_table_factory.h" |
| #include "table/get_context.h" |
| #include "table/internal_iterator.h" |
| #include "table/plain_table_factory.h" |
| #include "table/table_builder.h" |
| #include "util/file_reader_writer.h" |
| #include "util/testharness.h" |
| #include "util/testutil.h" |
| |
| using GFLAGS::ParseCommandLineFlags; |
| using GFLAGS::SetUsageMessage; |
| |
| namespace rocksdb { |
| |
| namespace { |
| // Make a key that i determines the first 4 characters and j determines the |
| // last 4 characters. |
| static std::string MakeKey(int i, int j, bool through_db) { |
| char buf[100]; |
| snprintf(buf, sizeof(buf), "%04d__key___%04d", i, j); |
| if (through_db) { |
| return std::string(buf); |
| } |
| // If we directly query table, which operates on internal keys |
| // instead of user keys, we need to add 8 bytes of internal |
| // information (row type etc) to user key to make an internal |
| // key. |
| InternalKey key(std::string(buf), 0, ValueType::kTypeValue); |
| return key.Encode().ToString(); |
| } |
| |
| uint64_t Now(Env* env, bool measured_by_nanosecond) { |
| return measured_by_nanosecond ? env->NowNanos() : env->NowMicros(); |
| } |
| } // namespace |
| |
| // A very simple benchmark that. |
| // Create a table with roughly numKey1 * numKey2 keys, |
| // where there are numKey1 prefixes of the key, each has numKey2 number of |
| // distinguished key, differing in the suffix part. |
| // If if_query_empty_keys = false, query the existing keys numKey1 * numKey2 |
| // times randomly. |
| // If if_query_empty_keys = true, query numKey1 * numKey2 random empty keys. |
| // Print out the total time. |
| // If through_db=true, a full DB will be created and queries will be against |
| // it. Otherwise, operations will be directly through table level. |
| // |
| // If for_terator=true, instead of just query one key each time, it queries |
| // a range sharing the same prefix. |
| namespace { |
| void TableReaderBenchmark(Options& opts, EnvOptions& env_options, |
| ReadOptions& read_options, int num_keys1, |
| int num_keys2, int num_iter, int prefix_len, |
| bool if_query_empty_keys, bool for_iterator, |
| bool through_db, bool measured_by_nanosecond) { |
| rocksdb::InternalKeyComparator ikc(opts.comparator); |
| |
| std::string file_name = test::TmpDir() |
| + "/rocksdb_table_reader_benchmark"; |
| std::string dbname = test::TmpDir() + "/rocksdb_table_reader_bench_db"; |
| WriteOptions wo; |
| Env* env = Env::Default(); |
| TableBuilder* tb = nullptr; |
| DB* db = nullptr; |
| Status s; |
| const ImmutableCFOptions ioptions(opts); |
| unique_ptr<WritableFileWriter> file_writer; |
| if (!through_db) { |
| unique_ptr<WritableFile> file; |
| env->NewWritableFile(file_name, &file, env_options); |
| |
| std::vector<std::unique_ptr<IntTblPropCollectorFactory> > |
| int_tbl_prop_collector_factories; |
| |
| file_writer.reset(new WritableFileWriter(std::move(file), env_options)); |
| int unknown_level = -1; |
| tb = opts.table_factory->NewTableBuilder( |
| TableBuilderOptions(ioptions, ikc, &int_tbl_prop_collector_factories, |
| CompressionType::kNoCompression, |
| CompressionOptions(), |
| nullptr /* compression_dict */, |
| false /* skip_filters */, kDefaultColumnFamilyName, |
| unknown_level), |
| 0 /* column_family_id */, file_writer.get()); |
| } else { |
| s = DB::Open(opts, dbname, &db); |
| ASSERT_OK(s); |
| ASSERT_TRUE(db != nullptr); |
| } |
| // Populate slightly more than 1M keys |
| for (int i = 0; i < num_keys1; i++) { |
| for (int j = 0; j < num_keys2; j++) { |
| std::string key = MakeKey(i * 2, j, through_db); |
| if (!through_db) { |
| tb->Add(key, key); |
| } else { |
| db->Put(wo, key, key); |
| } |
| } |
| } |
| if (!through_db) { |
| tb->Finish(); |
| file_writer->Close(); |
| } else { |
| db->Flush(FlushOptions()); |
| } |
| |
| unique_ptr<TableReader> table_reader; |
| if (!through_db) { |
| unique_ptr<RandomAccessFile> raf; |
| s = env->NewRandomAccessFile(file_name, &raf, env_options); |
| if (!s.ok()) { |
| fprintf(stderr, "Create File Error: %s\n", s.ToString().c_str()); |
| exit(1); |
| } |
| uint64_t file_size; |
| env->GetFileSize(file_name, &file_size); |
| unique_ptr<RandomAccessFileReader> file_reader( |
| new RandomAccessFileReader(std::move(raf), file_name)); |
| s = opts.table_factory->NewTableReader( |
| TableReaderOptions(ioptions, env_options, ikc), std::move(file_reader), |
| file_size, &table_reader); |
| if (!s.ok()) { |
| fprintf(stderr, "Open Table Error: %s\n", s.ToString().c_str()); |
| exit(1); |
| } |
| } |
| |
| Random rnd(301); |
| std::string result; |
| HistogramImpl hist; |
| |
| for (int it = 0; it < num_iter; it++) { |
| for (int i = 0; i < num_keys1; i++) { |
| for (int j = 0; j < num_keys2; j++) { |
| int r1 = rnd.Uniform(num_keys1) * 2; |
| int r2 = rnd.Uniform(num_keys2); |
| if (if_query_empty_keys) { |
| r1++; |
| r2 = num_keys2 * 2 - r2; |
| } |
| |
| if (!for_iterator) { |
| // Query one existing key; |
| std::string key = MakeKey(r1, r2, through_db); |
| uint64_t start_time = Now(env, measured_by_nanosecond); |
| if (!through_db) { |
| PinnableSlice value; |
| MergeContext merge_context; |
| RangeDelAggregator range_del_agg(ikc, {} /* snapshots */); |
| GetContext get_context(ioptions.user_comparator, |
| ioptions.merge_operator, ioptions.info_log, |
| ioptions.statistics, GetContext::kNotFound, |
| Slice(key), &value, nullptr, &merge_context, |
| &range_del_agg, env); |
| s = table_reader->Get(read_options, key, &get_context); |
| } else { |
| s = db->Get(read_options, key, &result); |
| } |
| hist.Add(Now(env, measured_by_nanosecond) - start_time); |
| } else { |
| int r2_len; |
| if (if_query_empty_keys) { |
| r2_len = 0; |
| } else { |
| r2_len = rnd.Uniform(num_keys2) + 1; |
| if (r2_len + r2 > num_keys2) { |
| r2_len = num_keys2 - r2; |
| } |
| } |
| std::string start_key = MakeKey(r1, r2, through_db); |
| std::string end_key = MakeKey(r1, r2 + r2_len, through_db); |
| uint64_t total_time = 0; |
| uint64_t start_time = Now(env, measured_by_nanosecond); |
| Iterator* iter = nullptr; |
| InternalIterator* iiter = nullptr; |
| if (!through_db) { |
| iiter = table_reader->NewIterator(read_options); |
| } else { |
| iter = db->NewIterator(read_options); |
| } |
| int count = 0; |
| for (through_db ? iter->Seek(start_key) : iiter->Seek(start_key); |
| through_db ? iter->Valid() : iiter->Valid(); |
| through_db ? iter->Next() : iiter->Next()) { |
| if (if_query_empty_keys) { |
| break; |
| } |
| // verify key; |
| total_time += Now(env, measured_by_nanosecond) - start_time; |
| assert(Slice(MakeKey(r1, r2 + count, through_db)) == |
| (through_db ? iter->key() : iiter->key())); |
| start_time = Now(env, measured_by_nanosecond); |
| if (++count >= r2_len) { |
| break; |
| } |
| } |
| if (count != r2_len) { |
| fprintf( |
| stderr, "Iterator cannot iterate expected number of entries. " |
| "Expected %d but got %d\n", r2_len, count); |
| assert(false); |
| } |
| delete iter; |
| total_time += Now(env, measured_by_nanosecond) - start_time; |
| hist.Add(total_time); |
| } |
| } |
| } |
| } |
| |
| fprintf( |
| stderr, |
| "===================================================" |
| "====================================================\n" |
| "InMemoryTableSimpleBenchmark: %20s num_key1: %5d " |
| "num_key2: %5d %10s\n" |
| "===================================================" |
| "====================================================" |
| "\nHistogram (unit: %s): \n%s", |
| opts.table_factory->Name(), num_keys1, num_keys2, |
| for_iterator ? "iterator" : (if_query_empty_keys ? "empty" : "non_empty"), |
| measured_by_nanosecond ? "nanosecond" : "microsecond", |
| hist.ToString().c_str()); |
| if (!through_db) { |
| env->DeleteFile(file_name); |
| } else { |
| delete db; |
| db = nullptr; |
| DestroyDB(dbname, opts); |
| } |
| } |
| } // namespace |
| } // namespace rocksdb |
| |
| DEFINE_bool(query_empty, false, "query non-existing keys instead of existing " |
| "ones."); |
| DEFINE_int32(num_keys1, 4096, "number of distinguish prefix of keys"); |
| DEFINE_int32(num_keys2, 512, "number of distinguish keys for each prefix"); |
| DEFINE_int32(iter, 3, "query non-existing keys instead of existing ones"); |
| DEFINE_int32(prefix_len, 16, "Prefix length used for iterators and indexes"); |
| DEFINE_bool(iterator, false, "For test iterator"); |
| DEFINE_bool(through_db, false, "If enable, a DB instance will be created and " |
| "the query will be against DB. Otherwise, will be directly against " |
| "a table reader."); |
| DEFINE_bool(mmap_read, true, "Whether use mmap read"); |
| DEFINE_string(table_factory, "block_based", |
| "Table factory to use: `block_based` (default), `plain_table` or " |
| "`cuckoo_hash`."); |
| DEFINE_string(time_unit, "microsecond", |
| "The time unit used for measuring performance. User can specify " |
| "`microsecond` (default) or `nanosecond`"); |
| |
| int main(int argc, char** argv) { |
| SetUsageMessage(std::string("\nUSAGE:\n") + std::string(argv[0]) + |
| " [OPTIONS]..."); |
| ParseCommandLineFlags(&argc, &argv, true); |
| |
| std::shared_ptr<rocksdb::TableFactory> tf; |
| rocksdb::Options options; |
| if (FLAGS_prefix_len < 16) { |
| options.prefix_extractor.reset(rocksdb::NewFixedPrefixTransform( |
| FLAGS_prefix_len)); |
| } |
| rocksdb::ReadOptions ro; |
| rocksdb::EnvOptions env_options; |
| options.create_if_missing = true; |
| options.compression = rocksdb::CompressionType::kNoCompression; |
| |
| if (FLAGS_table_factory == "cuckoo_hash") { |
| #ifndef ROCKSDB_LITE |
| options.allow_mmap_reads = FLAGS_mmap_read; |
| env_options.use_mmap_reads = FLAGS_mmap_read; |
| rocksdb::CuckooTableOptions table_options; |
| table_options.hash_table_ratio = 0.75; |
| tf.reset(rocksdb::NewCuckooTableFactory(table_options)); |
| #else |
| fprintf(stderr, "Plain table is not supported in lite mode\n"); |
| exit(1); |
| #endif // ROCKSDB_LITE |
| } else if (FLAGS_table_factory == "plain_table") { |
| #ifndef ROCKSDB_LITE |
| options.allow_mmap_reads = FLAGS_mmap_read; |
| env_options.use_mmap_reads = FLAGS_mmap_read; |
| |
| rocksdb::PlainTableOptions plain_table_options; |
| plain_table_options.user_key_len = 16; |
| plain_table_options.bloom_bits_per_key = (FLAGS_prefix_len == 16) ? 0 : 8; |
| plain_table_options.hash_table_ratio = 0.75; |
| |
| tf.reset(new rocksdb::PlainTableFactory(plain_table_options)); |
| options.prefix_extractor.reset(rocksdb::NewFixedPrefixTransform( |
| FLAGS_prefix_len)); |
| #else |
| fprintf(stderr, "Cuckoo table is not supported in lite mode\n"); |
| exit(1); |
| #endif // ROCKSDB_LITE |
| } else if (FLAGS_table_factory == "block_based") { |
| tf.reset(new rocksdb::BlockBasedTableFactory()); |
| } else { |
| fprintf(stderr, "Invalid table type %s\n", FLAGS_table_factory.c_str()); |
| } |
| |
| if (tf) { |
| // if user provides invalid options, just fall back to microsecond. |
| bool measured_by_nanosecond = FLAGS_time_unit == "nanosecond"; |
| |
| options.table_factory = tf; |
| rocksdb::TableReaderBenchmark(options, env_options, ro, FLAGS_num_keys1, |
| FLAGS_num_keys2, FLAGS_iter, FLAGS_prefix_len, |
| FLAGS_query_empty, FLAGS_iterator, |
| FLAGS_through_db, measured_by_nanosecond); |
| } else { |
| return 1; |
| } |
| |
| return 0; |
| } |
| |
| #endif // GFLAGS |