blob: 73eafd084921bb7ac729a8ce024dfcaf550096c1 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <cstdlib>
#include <stdint.h>
#include <set>
#include <string>
#include <vector>
namespace kudu {
class Random;
// Writes exactly n random bytes to dest using the parameter Random generator.
// Note RandomString() does not null-terminate its strings, though '\0' could
// be written to dest with the same probability as any other byte.
void RandomString(void* dest, size_t n, Random* rng);
// Same as the above, but returns the string.
std::string RandomString(size_t n, Random* rng);
// Generate a 32-bit random seed from several sources, including timestamp,
// pid & tid.
uint32_t GetRandomSeed32();
// Returns a randomly-selected element from the container.
template <typename Container, typename T, typename Rand>
T SelectRandomElement(const Container& c, Rand* r) {
CHECK(!c.empty());
std::vector<T> rand_list;
ReservoirSample(c, 1, std::set<T>{}, r, &rand_list);
return rand_list[0];
}
// Returns a randomly-selected subset from the container. The subset will
// include at least 'min_to_return' results, but may contain more.
//
// The results are not stored in a randomized order: the order of results will
// match their order in the input collection.
template <typename Container, typename T, typename Rand>
std::vector<T> SelectRandomSubset(const Container& c, int min_to_return, Rand* r) {
CHECK_GE(c.size(), min_to_return);
int num_to_return = min_to_return + r->Uniform(1 + c.size() - min_to_return);
std::vector<T> rand_list;
ReservoirSample(c, num_to_return, std::set<T>{}, r, &rand_list);
return rand_list;
}
// Sample 'k' random elements from the collection 'c' into 'result', taking
// care not to sample any elements that are already present in 'avoid'.
//
// In the case that 'c' has fewer than 'k' elements then all elements in 'c'
// will be selected.
//
// 'c' should be an iterable STL collection such as a vector, set, or list.
// 'avoid' should be an STL-compatible set.
//
// The results are not stored in a randomized order: the order of results will
// match their order in the input collection.
template<class Collection, class Set, class T, typename Rand>
void ReservoirSample(const Collection& c, int k, const Set& avoid,
Rand* r, std::vector<T>* result) {
result->clear();
result->reserve(k);
int i = 0;
for (const T& elem : c) {
if (ContainsKey(avoid, elem)) {
continue;
}
i++;
// Fill the reservoir if there is available space.
if (result->size() < k) {
result->push_back(elem);
continue;
}
// Otherwise replace existing elements with decreasing probability.
int j = r->Uniform(i);
if (j < k) {
(*result)[j] = elem;
}
}
}
} // namespace kudu