blob: 4448023ea4e4b34f5a009e29d8b960e95bbd2ee7 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <cstdint>
#include <cstdlib>
#include <set>
#include <string>
#include <type_traits>
#include <unordered_set>
#include <vector>
#include <glog/logging.h>
#include "kudu/gutil/map-util.h"
#include "kudu/util/int128_util.h"
#include "kudu/util/random.h"
namespace kudu {
// Writes exactly n random bytes to dest using the parameter Random generator.
// Note RandomString() does not null-terminate its strings, though '\0' could
// be written to dest with the same probability as any other byte.
void RandomString(void* dest, size_t n, Random* rng);
// Same as the above, but returns the string.
std::string RandomString(size_t n, Random* rng);
// Generate a 32-bit random seed from several sources, including timestamp,
// pid & tid.
uint32_t GetRandomSeed32();
// Returns a randomly-selected element from the container.
template <typename Container, typename T, typename Rand>
T SelectRandomElement(const Container& c, Rand* r) {
CHECK(!c.empty());
std::vector<T> rand_list;
ReservoirSample(c, 1, std::set<T>{}, r, &rand_list);
return rand_list[0];
}
// Returns a randomly-selected subset from the container. The subset will
// include at least 'min_to_return' results, but may contain more.
//
// The results are not stored in a randomized order: the order of results will
// match their order in the input collection.
template <typename Container, typename T, typename Rand>
std::vector<T> SelectRandomSubset(const Container& c, int min_to_return, Rand* r) {
CHECK_GE(c.size(), min_to_return);
int num_to_return = min_to_return + r->Uniform(1 + c.size() - min_to_return);
std::vector<T> rand_list;
ReservoirSample(c, num_to_return, std::set<T>{}, r, &rand_list);
return rand_list;
}
// Sample 'k' random elements from the collection 'c' into 'result', taking
// care not to sample any elements that are already present in 'avoid'.
//
// In the case that 'c' has fewer than 'k' elements then all elements in 'c'
// will be selected.
//
// 'c' should be an iterable STL collection such as a vector, set, or list.
// 'avoid' should be an STL-compatible set.
//
// The results are not stored in a randomized order: the order of results will
// match their order in the input collection.
template<class Collection, class Set, class T, typename Rand>
void ReservoirSample(const Collection& c, int k, const Set& avoid,
Rand* r, std::vector<T>* result) {
result->clear();
result->reserve(k);
int i = 0;
for (const T& elem : c) {
if (ContainsKey(avoid, elem)) {
continue;
}
i++;
// Fill the reservoir if there is available space.
if (result->size() < k) {
result->push_back(elem);
continue;
}
// Otherwise replace existing elements with decreasing probability.
int j = r->Uniform(i);
if (j < k) {
(*result)[j] = elem;
}
}
}
// Generates random and unique 32-bit or 64-bit integers that are not present in
// the "avoid" set.
// TODO(bankim): Add support for 128-bit integers by providing hash value for 128-bit data types.
template<typename IntType, class Set, class RNG>
std::unordered_set<IntType> CreateRandomUniqueIntegers(const int num_values,
const Set& avoid,
RNG* rand) {
static_assert(std::is_integral<IntType>::value && (sizeof(IntType) == 4 || sizeof(IntType) == 8),
"Only 32-bit or 64-bit integers generated");
std::unordered_set<IntType> result;
while (result.size() < num_values) {
const IntType val = GetNextRandom<IntType>(rand);
if (ContainsKey(avoid, val)) {
continue;
}
if (!ContainsKey(result, val)) {
result.insert(val);
}
}
return result;
}
// Generates random 32-bit, 64-bit, or 128-bit integers in the [min_val, max_val) range.
template<typename IntType, class RNG>
// When entire range is specified for signed IntType, result of max_val - min_val is -1
// which when converted to unsigned type correctly represents the max value for the
// same unsigned IntType. Variants of Uniform() in RNG work with unsigned types.
// Hence the ASAN suppression.
ATTRIBUTE_NO_SANITIZE_INTEGER
std::vector<IntType> CreateRandomIntegersInRange(const int num_values,
IntType min_val,
IntType max_val,
RNG* rand) {
// type_traits not defined for 128-bit int data types, hence not checking for integral value.
static_assert(sizeof(IntType) == 4 || sizeof(IntType) == 8 || sizeof(IntType) == 16,
"Only 32-bit, 64-bit, or 128-bit integers generated");
CHECK_LT(min_val, max_val);
std::vector<IntType> result(num_values);
for (auto& v : result) {
v = min_val + GetNextUniformRandom(max_val - min_val, rand);
}
return result;
}
} // namespace kudu