blob: df9523126e27e888e63986e71ed21004b54c510e [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
**/
#ifndef QUICKSTEP_CATALOG_CATALOG_RELATION_STATISTICS_HPP_
#define QUICKSTEP_CATALOG_CATALOG_RELATION_STATISTICS_HPP_
#include <cstddef>
#include <unordered_map>
#include <utility>
#include "catalog/Catalog.pb.h"
#include "catalog/CatalogTypedefs.hpp"
#include "types/LongType.hpp"
#include "types/NullType.hpp"
#include "types/TypedValue.hpp"
#include "utility/Macros.hpp"
namespace quickstep {
/** \addtogroup Catalog
* @{
*/
/**
* @brief Statistics of a catalog relation. E.g. total number of tuples,
* number of distinct values for each column.
**/
class CatalogRelationStatistics {
public:
/**
* @brief Constructor.
**/
CatalogRelationStatistics()
: is_exact_(false),
num_tuples_(kNullValue) {}
/**
* @brief Reconstruct a CatalogRelationStatistics object from its serialized
* Protocol Buffer form.
*
* @param proto The Protocol Buffer serialization of a CatalogRelationStatistics
* object, previously produced by getProto().
**/
explicit CatalogRelationStatistics(const serialization::CatalogRelationStatistics &proto);
/**
* @brief Serialize the CatalogRelationStatistics object as Protocol Buffer.
*
* @return The Protocol Buffer representation of the CatalogRelationStatistics
* object.
**/
serialization::CatalogRelationStatistics getProto() const;
/**
* @brief Check whether the statistics are exact for the relation.
*
* return True if the statistics are exact for the relation, false otherwise.
*/
bool isExact() const {
return is_exact_;
}
/**
* @brief Set the boolean flag that indicates whether the statistics are exact
* for the relation.
*
* @param Exactness of the statistics.
*/
void setExactness(const bool is_exact) {
is_exact_ = is_exact;
}
/**
* @brief Check whether the number of tuples statistic is available.
*
* @return True if the number of tuples statistic is available, false otherwise.
*/
bool hasNumTuples() const {
return !num_tuples_.isNull();
}
/**
* @brief Get the number of tuples statistic.
*
* @return The number of tuples. Returns 0 if the statistic is not set.
*/
std::size_t getNumTuples() const {
DCHECK(hasNumTuples());
return num_tuples_.getLiteral<std::int64_t>();
}
/**
* @brief Set the number of tuples statistic.
*
* @param num_tuples The number of tuples statistic.
*/
void setNumTuples(const std::size_t num_tuples) {
num_tuples_ = LongType::InstanceNonNullable().makeValue(&num_tuples);
}
/**
* @brief Check whether the number of distinct values statistic is available
* for a column.
*
* @param The id of the column.
* @return True if the number of distinct values statistic is available,
* false otherwise.
*/
bool hasNumDistinctValues(const attribute_id attr_id) const {
const ColumnStats *stats = getColumnStats(attr_id);
return (stats != nullptr && !stats->num_distinct_values.isNull());
}
/**
* @brief Get the number of distinct values statistic for a column.
*
* @param The id of the column.
* @return The number of distinct values statistic for the column.
*/
std::size_t getNumDistinctValues(const attribute_id attr_id) const {
DCHECK(hasNumDistinctValues(attr_id));
return column_stats_.at(attr_id).num_distinct_values.getLiteral<std::int64_t>();
}
/**
* @brief Set the number of distinct values statistic for a column.
*
* @param attr_id The id of the column.
* @param num_distinct_values The number of distinct values statistic.
*/
void setNumDistinctValues(const attribute_id attr_id,
const std::size_t num_distinct_values) {
column_stats_[attr_id].num_distinct_values =
LongType::InstanceNonNullable().makeValue(&num_distinct_values);
}
/**
* @brief Check whether the minimum value statistic is available for a column.
*
* @param The id of the column.
* @return True if the minimum value statistic is available, false otherwise.
*/
bool hasMinValue(const attribute_id attr_id) const {
const ColumnStats *stats = getColumnStats(attr_id);
return (stats != nullptr && !stats->min_value.isNull());
}
/**
* @brief Get the minimum value statistic for a column.
*
* @param The id of the column.
* @return The minimum value statistic for the column.
*/
const TypedValue& getMinValue(const attribute_id attr_id) const {
DCHECK(hasMinValue(attr_id));
return column_stats_.at(attr_id).min_value;
}
/**
* @brief Set the minimum value statistic for a column.
*
* @param The id of the column.
* @return The minimum value statistic for the column.
*/
void setMinValue(const attribute_id attr_id,
const TypedValue &min_value) {
column_stats_[attr_id].min_value = min_value;
}
/**
* @brief Check whether the maximum value statistic is available for a column.
*
* @param The id of the column.
* @return True if the maximum value statistic is available, false otherwise.
*/
bool hasMaxValue(const attribute_id attr_id) const {
const ColumnStats *stats = getColumnStats(attr_id);
return (stats != nullptr && !stats->max_value.isNull());
}
/**
* @brief Get the maximum value statistic for a column.
*
* @param The id of the column.
* @return The maximum value statistic for the column.
*/
const TypedValue& getMaxValue(const attribute_id attr_id) const {
DCHECK(hasMaxValue(attr_id));
return column_stats_.at(attr_id).max_value;
}
/**
* @brief Set the maximum value statistic for a column.
*
* @param The id of the column.
* @return The maximum value statistic for the column.
*/
void setMaxValue(const attribute_id attr_id,
const TypedValue &max_value) {
column_stats_[attr_id].max_value = max_value;
}
private:
struct ColumnStats {
ColumnStats()
: num_distinct_values(kNullValue),
min_value(kNullValue),
max_value(kNullValue) {
}
TypedValue num_distinct_values;
TypedValue min_value;
TypedValue max_value;
};
inline const ColumnStats* getColumnStats(const attribute_id attr_id) const {
const auto it = column_stats_.find(attr_id);
return (it == column_stats_.end() ? nullptr : &it->second);
}
static const TypedValue kNullValue;
// Whether the statistics are exact (i.e. up-to-date) for the relation.
bool is_exact_;
// Total number of tuples in the relation.
TypedValue num_tuples_;
// Statistics for each column.
std::unordered_map<attribute_id, ColumnStats> column_stats_;
DISALLOW_COPY_AND_ASSIGN(CatalogRelationStatistics);
};
/** @} */
} // namespace quickstep
#endif // QUICKSTEP_CATALOG_CATALOG_RELATION_STATISTICS_HPP_