blob: 8d17f66fa892a61eb496c79099054a241f0744e8 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef KUDU_COMMON_ID_MAPPING_H
#define KUDU_COMMON_ID_MAPPING_H
#include <cstddef>
#include <cstdint>
#include <ostream>
#include <utility>
#include <vector>
#include <glog/logging.h>
namespace kudu {
// Light-weight hashtable implementation for mapping a small number of
// integers to other integers.
// This is used by Schema to map from Column ID to column index.
//
// The implementation is an open-addressed hash table with linear probing.
// The probing is limited to look only in the initial position and a single
// position following. If neither position is free, the hashtable is doubled.
// Therefore, the fill rate of the hashtable could be fairly bad, in the worst
// case. However, in practice, we expect that most tables will have nearly
// sequential column IDs (with only the occasional gap if a column has been removed).
// Therefore, we expect to have very few collisions.
//
// The implementation takes care to only use power-of-2 sized bucket arrays so that
// modulo can be calculated using bit masking. This improves performance substantially
// since the 'div' instruction can take many cycles.
//
// NOTE: this map restricts that keys and values are positive. '-1' is used
// as a special identifier indicating that the slot is unused or that the key
// was not found.
class IdMapping {
private:
enum {
kInitialCapacity = 64,
kNumProbes = 2
};
typedef std::pair<int, int> value_type;
public:
static const int kNoEntry;
IdMapping() :
mask_(kInitialCapacity - 1),
entries_(kInitialCapacity) {
clear();
}
void clear() {
ClearMap(&entries_);
}
// NOLINT on this function definition because it thinks we're calling
// std::swap instead of defining it.
void swap(IdMapping& other) { // NOLINT(*)
std::swap(mask_, other.mask_);
entries_.swap(other.entries_);
}
int operator[](int key) const {
return get(key);
}
int get(int key) const {
DCHECK_GE(key, 0);
for (int i = 0; i < kNumProbes; i++) {
int s = slot(key + i);
if (entries_[s].first == key || entries_[s].first == kNoEntry) {
return entries_[s].second;
}
}
return kNoEntry;
}
void set(int key, int val) {
DCHECK_GE(key, 0);
DCHECK_GE(val, 0);
while (true) {
for (int i = 0; i < kNumProbes; i++) {
int s = slot(key + i);
CHECK_NE(entries_[s].first, key) << "Cannot insert duplicate keys";
if (entries_[s].first == kNoEntry) {
entries_[s].first = key;
entries_[s].second = val;
return;
}
}
// Didn't find a spot.
DoubleCapacity();
}
}
int capacity() const {
return mask_ + 1;
}
// Returns the memory usage of this object without the object itself. Should
// be used when embedded inside another object.
size_t memory_footprint_excluding_this() const;
// Returns the memory usage of this object including the object itself.
// Should be used when allocated on the heap.
size_t memory_footprint_including_this() const;
private:
int slot(int key) const {
return key & mask_;
}
void DoubleCapacity() {
int new_capacity = capacity() * 2;
DCHECK_GE(new_capacity, 0);
std::vector<value_type> entries(new_capacity);
ClearMap(&entries);
mask_ = new_capacity - 1;
DCHECK_EQ(mask_ & new_capacity, 0);
entries.swap(entries_);
for (const auto& entry : entries) {
if (entry.first != kNoEntry) {
set(entry.first, entry.second);
}
}
}
static void ClearMap(std::vector<value_type>* v) {
for (auto& entry : *v) {
entry = std::make_pair(kNoEntry, kNoEntry);
}
}
uint64_t mask_;
std::vector<value_type> entries_;
};
} // namespace kudu
#endif /* KUDU_COMMON_ID_MAPPING_H */