blob: 1d4b0b6558b7233df9a2b6bbbae36fa654c9fc44 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef ORC_STATISTICS_HH
#define ORC_STATISTICS_HH
#include "orc/orc-config.hh"
#include "orc/Type.hh"
#include "orc/Vector.hh"
namespace orc {
/**
* Statistics that are available for all types of columns.
*/
class ColumnStatistics {
public:
virtual ~ColumnStatistics();
/**
* Get the number of values in this column. It will differ from the number
* of rows because of NULL values.
* @return the number of values
*/
virtual uint64_t getNumberOfValues() const = 0;
/**
* Check whether column has null value.
* @return true if has null value
*/
virtual bool hasNull() const = 0;
/**
* Print out statistics of column if any.
*/
virtual std::string toString() const = 0;
};
/**
* Statistics for binary columns.
*/
class BinaryColumnStatistics: public ColumnStatistics {
public:
virtual ~BinaryColumnStatistics();
/**
* Check whether column has total length.
* @return true if has total length
*/
virtual bool hasTotalLength() const = 0;
virtual uint64_t getTotalLength() const = 0;
};
/**
* Statistics for boolean columns.
*/
class BooleanColumnStatistics: public ColumnStatistics {
public:
virtual ~BooleanColumnStatistics();
/**
* Check whether column has true/false count.
* @return true if has true/false count
*/
virtual bool hasCount() const = 0;
virtual uint64_t getFalseCount() const = 0;
virtual uint64_t getTrueCount() const = 0;
};
/**
* Statistics for date columns.
*/
class DateColumnStatistics: public ColumnStatistics {
public:
virtual ~DateColumnStatistics();
/**
* Check whether column has minimum.
* @return true if has minimum
*/
virtual bool hasMinimum() const = 0;
/**
* Check whether column has maximum.
* @return true if has maximum
*/
virtual bool hasMaximum() const = 0;
/**
* Get the minimum value for the column.
* @return minimum value
*/
virtual int32_t getMinimum() const = 0;
/**
* Get the maximum value for the column.
* @return maximum value
*/
virtual int32_t getMaximum() const = 0;
};
/**
* Statistics for decimal columns.
*/
class DecimalColumnStatistics: public ColumnStatistics {
public:
virtual ~DecimalColumnStatistics();
/**
* Check whether column has minimum.
* @return true if has minimum
*/
virtual bool hasMinimum() const = 0;
/**
* Check whether column has maximum.
* @return true if has maximum
*/
virtual bool hasMaximum() const = 0;
/**
* Check whether column has sum.
* @return true if has sum
*/
virtual bool hasSum() const = 0;
/**
* Get the minimum value for the column.
* @return minimum value
*/
virtual Decimal getMinimum() const = 0;
/**
* Get the maximum value for the column.
* @return maximum value
*/
virtual Decimal getMaximum() const = 0;
/**
* Get the sum for the column.
* @return sum of all the values
*/
virtual Decimal getSum() const = 0;
};
/**
* Statistics for float and double columns.
*/
class DoubleColumnStatistics: public ColumnStatistics {
public:
virtual ~DoubleColumnStatistics();
/**
* Check whether column has minimum.
* @return true if has minimum
*/
virtual bool hasMinimum() const = 0;
/**
* Check whether column has maximum.
* @return true if has maximum
*/
virtual bool hasMaximum() const = 0;
/**
* Check whether column has sum.
* @return true if has sum
*/
virtual bool hasSum() const = 0;
/**
* Get the smallest value in the column. Only defined if getNumberOfValues
* is non-zero.
* @return the minimum
*/
virtual double getMinimum() const = 0;
/**
* Get the largest value in the column. Only defined if getNumberOfValues
* is non-zero.
* @return the maximum
*/
virtual double getMaximum() const = 0;
/**
* Get the sum of the values in the column.
* @return the sum
*/
virtual double getSum() const = 0;
};
/**
* Statistics for all of the integer columns, such as byte, short, int, and
* long.
*/
class IntegerColumnStatistics: public ColumnStatistics {
public:
virtual ~IntegerColumnStatistics();
/**
* Check whether column has minimum.
* @return true if has minimum
*/
virtual bool hasMinimum() const = 0;
/**
* Check whether column has maximum.
* @return true if has maximum
*/
virtual bool hasMaximum() const = 0;
/**
* Check whether column has sum.
* @return true if has sum
*/
virtual bool hasSum() const = 0;
/**
* Get the smallest value in the column. Only defined if getNumberOfValues
* is non-zero.
* @return the minimum
*/
virtual int64_t getMinimum() const = 0;
/**
* Get the largest value in the column. Only defined if getNumberOfValues
* is non-zero.
* @return the maximum
*/
virtual int64_t getMaximum() const = 0;
/**
* Get the sum of the column. Only valid if isSumDefined returns true.
* @return the sum of the column
*/
virtual int64_t getSum() const = 0;
};
/**
* Statistics for string columns.
*/
class StringColumnStatistics: public ColumnStatistics {
public:
virtual ~StringColumnStatistics();
/**
* Check whether column has minimum.
* @return true if has minimum
*/
virtual bool hasMinimum() const = 0;
/**
* Check whether column has maximum.
* @return true if has maximum
*/
virtual bool hasMaximum() const = 0;
/**
* Check whether column has total length.
* @return true if has total length
*/
virtual bool hasTotalLength() const = 0;
/**
* Get the minimum value for the column.
* @return minimum value
*/
virtual const std::string & getMinimum() const = 0;
/**
* Get the maximum value for the column.
* @return maximum value
*/
virtual const std::string & getMaximum() const = 0;
/**
* Get the total length of all values.
* @return total length of all the values
*/
virtual uint64_t getTotalLength() const = 0;
};
/**
* Statistics for timestamp columns.
*/
class TimestampColumnStatistics: public ColumnStatistics {
public:
virtual ~TimestampColumnStatistics();
/**
* Check whether column minimum.
* @return true if has minimum
*/
virtual bool hasMinimum() const = 0;
/**
* Check whether column maximum.
* @return true if has maximum
*/
virtual bool hasMaximum() const = 0;
/**
* Get the minimum value for the column.
* @return minimum value
*/
virtual int64_t getMinimum() const = 0;
/**
* Get the maximum value for the column.
* @return maximum value
*/
virtual int64_t getMaximum() const = 0;
/**
* Check whether column has a lowerBound.
* @return true if column has a lowerBound
*/
virtual bool hasLowerBound() const = 0;
/**
* Check whether column has an upperBound.
* @return true if column has an upperBound
*/
virtual bool hasUpperBound() const = 0;
/**
* Get the lowerBound value for the column.
* @return lowerBound value
*/
virtual int64_t getLowerBound() const = 0;
/**
* Get the upperBound value for the column.
* @return upperBound value
*/
virtual int64_t getUpperBound() const = 0;
};
class Statistics {
public:
virtual ~Statistics();
/**
* Get the statistics of the given column.
* @param colId id of the column
* @return one column's statistics
*/
virtual const ColumnStatistics* getColumnStatistics(uint32_t colId
) const = 0;
/**
* Get the number of columns.
* @return the number of columns
*/
virtual uint32_t getNumberOfColumns() const = 0;
};
class StripeStatistics : public Statistics {
public:
virtual ~StripeStatistics();
/**
* Get the statistics of a given RowIndex entry in a given column.
* @param columnId id of the column
* @param rowIndexId RowIndex entry id
* @return statistics of the given RowIndex entry
*/
virtual const ColumnStatistics*
getRowIndexStatistics(
uint32_t columnId, uint32_t rowIndexId) const = 0;
/**
* Get the number of RowIndex statistics in a given column.
* @param columnId id of the column
* @return the number of RowIndex statistics
*/
virtual uint32_t getNumberOfRowIndexStats(uint32_t columnId) const = 0;
};
}
#endif