blob: ad58bdd214c612e1b8989ef508be7c889397980e [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.orc.impl;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparator;
import org.apache.orc.BinaryColumnStatistics;
import org.apache.orc.BooleanColumnStatistics;
import org.apache.orc.CollectionColumnStatistics;
import org.apache.orc.ColumnStatistics;
import org.apache.orc.DateColumnStatistics;
import org.apache.orc.DecimalColumnStatistics;
import org.apache.orc.DoubleColumnStatistics;
import org.apache.orc.IntegerColumnStatistics;
import org.apache.orc.OrcProto;
import org.apache.orc.StringColumnStatistics;
import org.apache.orc.TimestampColumnStatistics;
import org.apache.orc.TypeDescription;
import java.sql.Date;
import java.sql.Timestamp;
import java.util.TimeZone;
public class ColumnStatisticsImpl implements ColumnStatistics {
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (!(o instanceof ColumnStatisticsImpl)) {
return false;
}
ColumnStatisticsImpl that = (ColumnStatisticsImpl) o;
if (count != that.count) {
return false;
}
if (hasNull != that.hasNull) {
return false;
}
if (bytesOnDisk != that.bytesOnDisk) {
return false;
}
return true;
}
@Override
public int hashCode() {
int result = (int) (count ^ (count >>> 32));
result = 31 * result + (hasNull ? 1 : 0);
return result;
}
private static final class BooleanStatisticsImpl extends ColumnStatisticsImpl
implements BooleanColumnStatistics {
private long trueCount = 0;
BooleanStatisticsImpl(OrcProto.ColumnStatistics stats) {
super(stats);
OrcProto.BucketStatistics bkt = stats.getBucketStatistics();
trueCount = bkt.getCount(0);
}
BooleanStatisticsImpl() {
}
@Override
public void reset() {
super.reset();
trueCount = 0;
}
@Override
public void updateBoolean(boolean value, int repetitions) {
if (value) {
trueCount += repetitions;
}
}
@Override
public void merge(ColumnStatisticsImpl other) {
if (other instanceof BooleanStatisticsImpl) {
BooleanStatisticsImpl bkt = (BooleanStatisticsImpl) other;
trueCount += bkt.trueCount;
} else {
if (isStatsExists() && trueCount != 0) {
throw new IllegalArgumentException("Incompatible merging of boolean column statistics");
}
}
super.merge(other);
}
@Override
public OrcProto.ColumnStatistics.Builder serialize() {
OrcProto.ColumnStatistics.Builder builder = super.serialize();
OrcProto.BucketStatistics.Builder bucket =
OrcProto.BucketStatistics.newBuilder();
bucket.addCount(trueCount);
builder.setBucketStatistics(bucket);
return builder;
}
@Override
public long getFalseCount() {
return getNumberOfValues() - trueCount;
}
@Override
public long getTrueCount() {
return trueCount;
}
@Override
public String toString() {
return super.toString() + " true: " + trueCount;
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (!(o instanceof BooleanStatisticsImpl)) {
return false;
}
if (!super.equals(o)) {
return false;
}
BooleanStatisticsImpl that = (BooleanStatisticsImpl) o;
if (trueCount != that.trueCount) {
return false;
}
return true;
}
@Override
public int hashCode() {
int result = super.hashCode();
result = 31 * result + (int) (trueCount ^ (trueCount >>> 32));
return result;
}
}
/**
* Column statistics for List and Map types.
*/
private static final class CollectionColumnStatisticsImpl extends ColumnStatisticsImpl
implements CollectionColumnStatistics {
protected long minimum = Long.MAX_VALUE;
protected long maximum = 0;
protected long sum = 0;
CollectionColumnStatisticsImpl() {
super();
}
CollectionColumnStatisticsImpl(OrcProto.ColumnStatistics stats) {
super(stats);
OrcProto.CollectionStatistics collStat = stats.getCollectionStatistics();
minimum = collStat.hasMinChildren() ? collStat.getMinChildren() : Long.MAX_VALUE;
maximum = collStat.hasMaxChildren() ? collStat.getMaxChildren() : 0;
sum = collStat.hasTotalChildren() ? collStat.getTotalChildren() : 0;
}
@Override
public void updateCollectionLength(final long length) {
/*
* Here, minimum = minCollectionLength
* maximum = maxCollectionLength
* sum = childCount
*/
if (length < minimum) {
minimum = length;
}
if (length > maximum) {
maximum = length;
}
this.sum += length;
}
@Override
public void reset() {
super.reset();
minimum = Long.MAX_VALUE;
maximum = 0;
sum = 0;
}
@Override
public void merge(ColumnStatisticsImpl other) {
if (other instanceof CollectionColumnStatisticsImpl) {
CollectionColumnStatisticsImpl otherColl = (CollectionColumnStatisticsImpl) other;
if(count == 0) {
minimum = otherColl.minimum;
maximum = otherColl.maximum;
} else {
if (otherColl.minimum < minimum) {
minimum = otherColl.minimum;
}
if (otherColl.maximum > maximum) {
maximum = otherColl.maximum;
}
}
sum += otherColl.sum;
} else {
if (isStatsExists()) {
throw new IllegalArgumentException("Incompatible merging of collection column statistics");
}
}
super.merge(other);
}
@Override
public long getMinimumChildren() {
return minimum;
}
@Override
public long getMaximumChildren() {
return maximum;
}
@Override
public long getTotalChildren() {
return sum;
}
@Override
public String toString() {
StringBuilder buf = new StringBuilder(super.toString());
if (count != 0) {
buf.append(" minChildren: ");
buf.append(minimum);
buf.append(" maxChildren: ");
buf.append(maximum);
if (sum != 0) {
buf.append(" totalChildren: ");
buf.append(sum);
}
}
return buf.toString();
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (!(o instanceof CollectionColumnStatisticsImpl)) {
return false;
}
if (!super.equals(o)) {
return false;
}
CollectionColumnStatisticsImpl that = (CollectionColumnStatisticsImpl) o;
if (minimum != that.minimum) {
return false;
}
if (maximum != that.maximum) {
return false;
}
if (sum != that.sum) {
return false;
}
return true;
}
@Override
public int hashCode() {
int result = super.hashCode();
result = 31 * result + (count != 0 ? (int) (minimum ^ (minimum >>> 32)): 0) ;
result = 31 * result + (count != 0 ? (int) (maximum ^ (maximum >>> 32)): 0);
result = 31 * result + (sum != 0 ? (int) (sum ^ (sum >>> 32)): 0);
return result;
}
@Override
public OrcProto.ColumnStatistics.Builder serialize() {
OrcProto.ColumnStatistics.Builder builder = super.serialize();
OrcProto.CollectionStatistics.Builder collectionStats =
OrcProto.CollectionStatistics.newBuilder();
if (count != 0) {
collectionStats.setMinChildren(minimum);
collectionStats.setMaxChildren(maximum);
}
if (sum != 0) {
collectionStats.setTotalChildren(sum);
}
builder.setCollectionStatistics(collectionStats);
return builder;
}
}
/**
* Implementation of IntegerColumnStatistics
*/
private static final class IntegerStatisticsImpl extends ColumnStatisticsImpl
implements IntegerColumnStatistics {
private long minimum = Long.MAX_VALUE;
private long maximum = Long.MIN_VALUE;
private long sum = 0;
private boolean hasMinimum = false;
private boolean overflow = false;
IntegerStatisticsImpl() {
}
IntegerStatisticsImpl(OrcProto.ColumnStatistics stats) {
super(stats);
OrcProto.IntegerStatistics intStat = stats.getIntStatistics();
if (intStat.hasMinimum()) {
hasMinimum = true;
minimum = intStat.getMinimum();
}
if (intStat.hasMaximum()) {
maximum = intStat.getMaximum();
}
if (intStat.hasSum()) {
sum = intStat.getSum();
} else {
overflow = true;
}
}
@Override
public void reset() {
super.reset();
hasMinimum = false;
minimum = Long.MAX_VALUE;
maximum = Long.MIN_VALUE;
sum = 0;
overflow = false;
}
@Override
public void updateInteger(long value, int repetitions) {
if (!hasMinimum) {
hasMinimum = true;
minimum = value;
maximum = value;
} else if (value < minimum) {
minimum = value;
} else if (value > maximum) {
maximum = value;
}
if (!overflow) {
boolean wasPositive = sum >= 0;
sum += value * repetitions;
if ((value >= 0) == wasPositive) {
overflow = (sum >= 0) != wasPositive;
}
}
}
@Override
public void merge(ColumnStatisticsImpl other) {
if (other instanceof IntegerStatisticsImpl) {
IntegerStatisticsImpl otherInt = (IntegerStatisticsImpl) other;
if (!hasMinimum) {
hasMinimum = otherInt.hasMinimum;
minimum = otherInt.minimum;
maximum = otherInt.maximum;
} else if (otherInt.hasMinimum) {
if (otherInt.minimum < minimum) {
minimum = otherInt.minimum;
}
if (otherInt.maximum > maximum) {
maximum = otherInt.maximum;
}
}
overflow |= otherInt.overflow;
if (!overflow) {
boolean wasPositive = sum >= 0;
sum += otherInt.sum;
if ((otherInt.sum >= 0) == wasPositive) {
overflow = (sum >= 0) != wasPositive;
}
}
} else {
if (isStatsExists() && hasMinimum) {
throw new IllegalArgumentException("Incompatible merging of integer column statistics");
}
}
super.merge(other);
}
@Override
public OrcProto.ColumnStatistics.Builder serialize() {
OrcProto.ColumnStatistics.Builder builder = super.serialize();
OrcProto.IntegerStatistics.Builder intb =
OrcProto.IntegerStatistics.newBuilder();
if (hasMinimum) {
intb.setMinimum(minimum);
intb.setMaximum(maximum);
}
if (!overflow) {
intb.setSum(sum);
}
builder.setIntStatistics(intb);
return builder;
}
@Override
public long getMinimum() {
return minimum;
}
@Override
public long getMaximum() {
return maximum;
}
@Override
public boolean isSumDefined() {
return !overflow;
}
@Override
public long getSum() {
return sum;
}
@Override
public String toString() {
StringBuilder buf = new StringBuilder(super.toString());
if (hasMinimum) {
buf.append(" min: ");
buf.append(minimum);
buf.append(" max: ");
buf.append(maximum);
}
if (!overflow) {
buf.append(" sum: ");
buf.append(sum);
}
return buf.toString();
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (!(o instanceof IntegerStatisticsImpl)) {
return false;
}
if (!super.equals(o)) {
return false;
}
IntegerStatisticsImpl that = (IntegerStatisticsImpl) o;
if (minimum != that.minimum) {
return false;
}
if (maximum != that.maximum) {
return false;
}
if (sum != that.sum) {
return false;
}
if (hasMinimum != that.hasMinimum) {
return false;
}
if (overflow != that.overflow) {
return false;
}
return true;
}
@Override
public int hashCode() {
int result = super.hashCode();
result = 31 * result + (int) (minimum ^ (minimum >>> 32));
result = 31 * result + (int) (maximum ^ (maximum >>> 32));
result = 31 * result + (int) (sum ^ (sum >>> 32));
result = 31 * result + (hasMinimum ? 1 : 0);
result = 31 * result + (overflow ? 1 : 0);
return result;
}
}
private static final class DoubleStatisticsImpl extends ColumnStatisticsImpl
implements DoubleColumnStatistics {
private boolean hasMinimum = false;
private double minimum = Double.MAX_VALUE;
private double maximum = Double.MIN_VALUE;
private double sum = 0;
DoubleStatisticsImpl() {
}
DoubleStatisticsImpl(OrcProto.ColumnStatistics stats) {
super(stats);
OrcProto.DoubleStatistics dbl = stats.getDoubleStatistics();
if (dbl.hasMinimum()) {
hasMinimum = true;
minimum = dbl.getMinimum();
}
if (dbl.hasMaximum()) {
maximum = dbl.getMaximum();
}
if (dbl.hasSum()) {
sum = dbl.getSum();
}
}
@Override
public void reset() {
super.reset();
hasMinimum = false;
minimum = Double.MAX_VALUE;
maximum = Double.MIN_VALUE;
sum = 0;
}
@Override
public void updateDouble(double value) {
if (!hasMinimum) {
hasMinimum = true;
minimum = value;
maximum = value;
} else if (value < minimum) {
minimum = value;
} else if (value > maximum) {
maximum = value;
}
sum += value;
}
@Override
public void merge(ColumnStatisticsImpl other) {
if (other instanceof DoubleStatisticsImpl) {
DoubleStatisticsImpl dbl = (DoubleStatisticsImpl) other;
if (!hasMinimum) {
hasMinimum = dbl.hasMinimum;
minimum = dbl.minimum;
maximum = dbl.maximum;
} else if (dbl.hasMinimum) {
if (dbl.minimum < minimum) {
minimum = dbl.minimum;
}
if (dbl.maximum > maximum) {
maximum = dbl.maximum;
}
}
sum += dbl.sum;
} else {
if (isStatsExists() && hasMinimum) {
throw new IllegalArgumentException("Incompatible merging of double column statistics");
}
}
super.merge(other);
}
@Override
public OrcProto.ColumnStatistics.Builder serialize() {
OrcProto.ColumnStatistics.Builder builder = super.serialize();
OrcProto.DoubleStatistics.Builder dbl =
OrcProto.DoubleStatistics.newBuilder();
if (hasMinimum) {
dbl.setMinimum(minimum);
dbl.setMaximum(maximum);
}
dbl.setSum(sum);
builder.setDoubleStatistics(dbl);
return builder;
}
@Override
public double getMinimum() {
return minimum;
}
@Override
public double getMaximum() {
return maximum;
}
@Override
public double getSum() {
return sum;
}
@Override
public String toString() {
StringBuilder buf = new StringBuilder(super.toString());
if (hasMinimum) {
buf.append(" min: ");
buf.append(minimum);
buf.append(" max: ");
buf.append(maximum);
}
buf.append(" sum: ");
buf.append(sum);
return buf.toString();
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (!(o instanceof DoubleStatisticsImpl)) {
return false;
}
if (!super.equals(o)) {
return false;
}
DoubleStatisticsImpl that = (DoubleStatisticsImpl) o;
if (hasMinimum != that.hasMinimum) {
return false;
}
if (Double.compare(that.minimum, minimum) != 0) {
return false;
}
if (Double.compare(that.maximum, maximum) != 0) {
return false;
}
if (Double.compare(that.sum, sum) != 0) {
return false;
}
return true;
}
@Override
public int hashCode() {
int result = super.hashCode();
long temp;
result = 31 * result + (hasMinimum ? 1 : 0);
temp = Double.doubleToLongBits(minimum);
result = 31 * result + (int) (temp ^ (temp >>> 32));
temp = Double.doubleToLongBits(maximum);
result = 31 * result + (int) (temp ^ (temp >>> 32));
temp = Double.doubleToLongBits(sum);
result = 31 * result + (int) (temp ^ (temp >>> 32));
return result;
}
}
protected static final class StringStatisticsImpl extends ColumnStatisticsImpl
implements StringColumnStatistics {
public static final int MAX_BYTES_RECORDED = 1024;
private Text minimum = null;
private Text maximum = null;
private long sum = 0;
private boolean isLowerBoundSet = false;
private boolean isUpperBoundSet = false;
StringStatisticsImpl() {
}
StringStatisticsImpl(OrcProto.ColumnStatistics stats) {
super(stats);
OrcProto.StringStatistics str = stats.getStringStatistics();
if (str.hasMaximum()) {
maximum = new Text(str.getMaximum());
} else if (str.hasUpperBound()) {
maximum = new Text(str.getUpperBound());
isUpperBoundSet = true;
}
if (str.hasMinimum()) {
minimum = new Text(str.getMinimum());
} else if (str.hasLowerBound()) {
minimum = new Text(str.getLowerBound());
isLowerBoundSet = true;
}
if(str.hasSum()) {
sum = str.getSum();
}
}
@Override
public void reset() {
super.reset();
minimum = null;
maximum = null;
isLowerBoundSet = false;
isUpperBoundSet = false;
sum = 0;
}
@Override
public void updateString(Text value) {
updateString(value.getBytes(), 0, value.getLength(), 1);
}
@Override
public void updateString(byte[] bytes, int offset, int length,
int repetitions) {
if (minimum == null) {
if(length > MAX_BYTES_RECORDED) {
minimum = truncateLowerBound(bytes, offset);
maximum = truncateUpperBound(bytes, offset);
isLowerBoundSet = true;
isUpperBoundSet = true;
} else {
maximum = minimum = new Text();
maximum.set(bytes, offset, length);
isLowerBoundSet = false;
isUpperBoundSet = false;
}
} else if (WritableComparator.compareBytes(minimum.getBytes(), 0,
minimum.getLength(), bytes, offset, length) > 0) {
if(length > MAX_BYTES_RECORDED) {
minimum = truncateLowerBound(bytes, offset);
isLowerBoundSet = true;
} else {
minimum = new Text();
minimum.set(bytes, offset, length);
isLowerBoundSet = false;
}
} else if (WritableComparator.compareBytes(maximum.getBytes(), 0,
maximum.getLength(), bytes, offset, length) < 0) {
if(length > MAX_BYTES_RECORDED) {
maximum = truncateUpperBound(bytes, offset);
isUpperBoundSet = true;
} else {
maximum = new Text();
maximum.set(bytes, offset, length);
isUpperBoundSet = false;
}
}
sum += (long)length * repetitions;
}
@Override
public void merge(ColumnStatisticsImpl other) {
if (other instanceof StringStatisticsImpl) {
StringStatisticsImpl str = (StringStatisticsImpl) other;
if (count == 0) {
if (str.count != 0) {
minimum = new Text(str.minimum);
isLowerBoundSet = str.isLowerBoundSet;
maximum = new Text(str.maximum);
isUpperBoundSet = str.isUpperBoundSet;
} else {
/* both are empty */
maximum = minimum = null;
isLowerBoundSet = false;
isUpperBoundSet = false;
}
} else if (str.count != 0) {
if (minimum.compareTo(str.minimum) > 0) {
minimum = new Text(str.minimum);
isLowerBoundSet = str.isLowerBoundSet;
}
if (maximum.compareTo(str.maximum) < 0) {
maximum = new Text(str.maximum);
isUpperBoundSet = str.isUpperBoundSet;
}
}
sum += str.sum;
} else {
if (isStatsExists()) {
throw new IllegalArgumentException("Incompatible merging of string column statistics");
}
}
super.merge(other);
}
@Override
public OrcProto.ColumnStatistics.Builder serialize() {
OrcProto.ColumnStatistics.Builder result = super.serialize();
OrcProto.StringStatistics.Builder str =
OrcProto.StringStatistics.newBuilder();
if (getNumberOfValues() != 0) {
if (isLowerBoundSet) {
str.setLowerBound(minimum.toString());
} else {
str.setMinimum(minimum.toString());
}
if (isUpperBoundSet) {
str.setUpperBound(maximum.toString());
} else {
str.setMaximum(maximum.toString());
}
str.setSum(sum);
}
result.setStringStatistics(str);
return result;
}
@Override
public String getMinimum() {
/* if we have lower bound set (in case of truncation)
getMinimum will be null */
if(isLowerBoundSet) {
return null;
} else {
return minimum == null ? null : minimum.toString();
}
}
@Override
public String getMaximum() {
/* if we have upper bound set (in case of truncation)
getMaximum will be null */
if(isUpperBoundSet) {
return null;
} else {
return maximum == null ? null : maximum.toString();
}
}
/**
* Get the string with
* length = Min(StringStatisticsImpl.MAX_BYTES_RECORDED, getMinimum())
*
* @return lower bound
*/
@Override
public String getLowerBound() {
return minimum == null ? null : minimum.toString();
}
/**
* Get the string with
* length = Min(StringStatisticsImpl.MAX_BYTES_RECORDED, getMaximum())
*
* @return upper bound
*/
@Override
public String getUpperBound() {
return maximum == null ? null : maximum.toString();
}
@Override
public long getSum() {
return sum;
}
@Override
public String toString() {
StringBuilder buf = new StringBuilder(super.toString());
if (minimum != null) {
if (isLowerBoundSet) {
buf.append(" lower: ");
} else {
buf.append(" min: ");
}
buf.append(getLowerBound());
if (isUpperBoundSet) {
buf.append(" upper: ");
} else {
buf.append(" max: ");
}
buf.append(getUpperBound());
buf.append(" sum: ");
buf.append(sum);
}
return buf.toString();
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (!(o instanceof StringStatisticsImpl)) {
return false;
}
if (!super.equals(o)) {
return false;
}
StringStatisticsImpl that = (StringStatisticsImpl) o;
if (sum != that.sum) {
return false;
}
if (minimum != null ? !minimum.equals(that.minimum) : that.minimum != null) {
return false;
}
if (maximum != null ? !maximum.equals(that.maximum) : that.maximum != null) {
return false;
}
return true;
}
@Override
public int hashCode() {
int result = super.hashCode();
result = 31 * result + (minimum != null ? minimum.hashCode() : 0);
result = 31 * result + (maximum != null ? maximum.hashCode() : 0);
result = 31 * result + (int) (sum ^ (sum >>> 32));
return result;
}
private static void appendCodePoint(Text result, int codepoint) {
if (codepoint < 0 || codepoint > 0x1f_ffff) {
throw new IllegalArgumentException("Codepoint out of range " +
codepoint);
}
byte[] buffer = new byte[4];
if (codepoint < 0x7f) {
buffer[0] = (byte) codepoint;
result.append(buffer, 0, 1);
} else if (codepoint <= 0x7ff) {
buffer[0] = (byte) (0xc0 | (codepoint >> 6));
buffer[1] = (byte) (0x80 | (codepoint & 0x3f));
result.append(buffer, 0 , 2);
} else if (codepoint < 0xffff) {
buffer[0] = (byte) (0xe0 | (codepoint >> 12));
buffer[1] = (byte) (0x80 | ((codepoint >> 6) & 0x3f));
buffer[2] = (byte) (0x80 | (codepoint & 0x3f));
result.append(buffer, 0, 3);
} else {
buffer[0] = (byte) (0xf0 | (codepoint >> 18));
buffer[1] = (byte) (0x80 | ((codepoint >> 12) & 0x3f));
buffer[2] = (byte) (0x80 | ((codepoint >> 6) & 0x3f));
buffer[3] = (byte) (0x80 | (codepoint & 0x3f));
result.append(buffer, 0, 4);
}
}
/**
* Create a text that is truncated to at most MAX_BYTES_RECORDED at a
* character boundary with the last code point incremented by 1.
* The length is assumed to be greater than MAX_BYTES_RECORDED.
* @param text the text to truncate
* @param from the index of the first character
* @return truncated Text value
*/
private static Text truncateUpperBound(final byte[] text, final int from) {
int followingChar = Utf8Utils.findLastCharacter(text, from,
from + MAX_BYTES_RECORDED);
int lastChar = Utf8Utils.findLastCharacter(text, from, followingChar - 1);
Text result = new Text();
result.set(text, from, lastChar - from);
appendCodePoint(result,
Utf8Utils.getCodePoint(text, lastChar, followingChar - lastChar) + 1);
return result;
}
/**
* Create a text that is truncated to at most MAX_BYTES_RECORDED at a
* character boundary.
* The length is assumed to be greater than MAX_BYTES_RECORDED.
* @param text Byte array to truncate
* @param from This is the index of the first character
* @return truncated {@link Text}
*/
private static Text truncateLowerBound(final byte[] text, final int from) {
int lastChar = Utf8Utils.findLastCharacter(text, from,
from + MAX_BYTES_RECORDED);
Text result = new Text();
result.set(text, from, lastChar - from);
return result;
}
}
protected static final class BinaryStatisticsImpl extends ColumnStatisticsImpl implements
BinaryColumnStatistics {
private long sum = 0;
BinaryStatisticsImpl() {
}
BinaryStatisticsImpl(OrcProto.ColumnStatistics stats) {
super(stats);
OrcProto.BinaryStatistics binStats = stats.getBinaryStatistics();
if (binStats.hasSum()) {
sum = binStats.getSum();
}
}
@Override
public void reset() {
super.reset();
sum = 0;
}
@Override
public void updateBinary(BytesWritable value) {
sum += value.getLength();
}
@Override
public void updateBinary(byte[] bytes, int offset, int length,
int repetitions) {
sum += (long)length * repetitions;
}
@Override
public void merge(ColumnStatisticsImpl other) {
if (other instanceof BinaryColumnStatistics) {
BinaryStatisticsImpl bin = (BinaryStatisticsImpl) other;
sum += bin.sum;
} else {
if (isStatsExists() && sum != 0) {
throw new IllegalArgumentException("Incompatible merging of binary column statistics");
}
}
super.merge(other);
}
@Override
public long getSum() {
return sum;
}
@Override
public OrcProto.ColumnStatistics.Builder serialize() {
OrcProto.ColumnStatistics.Builder result = super.serialize();
OrcProto.BinaryStatistics.Builder bin = OrcProto.BinaryStatistics.newBuilder();
bin.setSum(sum);
result.setBinaryStatistics(bin);
return result;
}
@Override
public String toString() {
StringBuilder buf = new StringBuilder(super.toString());
if (getNumberOfValues() != 0) {
buf.append(" sum: ");
buf.append(sum);
}
return buf.toString();
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (!(o instanceof BinaryStatisticsImpl)) {
return false;
}
if (!super.equals(o)) {
return false;
}
BinaryStatisticsImpl that = (BinaryStatisticsImpl) o;
if (sum != that.sum) {
return false;
}
return true;
}
@Override
public int hashCode() {
int result = super.hashCode();
result = 31 * result + (int) (sum ^ (sum >>> 32));
return result;
}
}
private static final class DecimalStatisticsImpl extends ColumnStatisticsImpl
implements DecimalColumnStatistics {
// These objects are mutable for better performance.
private HiveDecimalWritable minimum = null;
private HiveDecimalWritable maximum = null;
private HiveDecimalWritable sum = new HiveDecimalWritable(0);
DecimalStatisticsImpl() {
}
DecimalStatisticsImpl(OrcProto.ColumnStatistics stats) {
super(stats);
OrcProto.DecimalStatistics dec = stats.getDecimalStatistics();
if (dec.hasMaximum()) {
maximum = new HiveDecimalWritable(dec.getMaximum());
}
if (dec.hasMinimum()) {
minimum = new HiveDecimalWritable(dec.getMinimum());
}
if (dec.hasSum()) {
sum = new HiveDecimalWritable(dec.getSum());
} else {
sum = null;
}
}
@Override
public void reset() {
super.reset();
minimum = null;
maximum = null;
sum = new HiveDecimalWritable(0);
}
@Override
public void updateDecimal(HiveDecimalWritable value) {
if (minimum == null) {
minimum = new HiveDecimalWritable(value);
maximum = new HiveDecimalWritable(value);
} else if (minimum.compareTo(value) > 0) {
minimum.set(value);
} else if (maximum.compareTo(value) < 0) {
maximum.set(value);
}
if (sum != null) {
sum.mutateAdd(value);
}
}
@Override
public void updateDecimal64(long value, int scale) {
HiveDecimalWritable dValue = new HiveDecimalWritable();
dValue.setFromLongAndScale(value, scale);
updateDecimal(dValue);
}
@Override
public void merge(ColumnStatisticsImpl other) {
if (other instanceof DecimalStatisticsImpl) {
DecimalStatisticsImpl dec = (DecimalStatisticsImpl) other;
if (minimum == null) {
minimum = (dec.minimum != null ? new HiveDecimalWritable(dec.minimum) : null);
maximum = (dec.maximum != null ? new HiveDecimalWritable(dec.maximum) : null);
sum = dec.sum;
} else if (dec.minimum != null) {
if (minimum.compareTo(dec.minimum) > 0) {
minimum.set(dec.minimum);
}
if (maximum.compareTo(dec.maximum) < 0) {
maximum.set(dec.maximum);
}
if (sum == null || dec.sum == null) {
sum = null;
} else {
sum.mutateAdd(dec.sum);
}
}
} else {
if (isStatsExists() && minimum != null) {
throw new IllegalArgumentException("Incompatible merging of decimal column statistics");
}
}
super.merge(other);
}
@Override
public OrcProto.ColumnStatistics.Builder serialize() {
OrcProto.ColumnStatistics.Builder result = super.serialize();
OrcProto.DecimalStatistics.Builder dec =
OrcProto.DecimalStatistics.newBuilder();
if (getNumberOfValues() != 0 && minimum != null) {
dec.setMinimum(minimum.toString());
dec.setMaximum(maximum.toString());
}
// Check isSet for overflow.
if (sum != null && sum.isSet()) {
dec.setSum(sum.toString());
}
result.setDecimalStatistics(dec);
return result;
}
@Override
public HiveDecimal getMinimum() {
return minimum == null ? null : minimum.getHiveDecimal();
}
@Override
public HiveDecimal getMaximum() {
return maximum == null ? null : maximum.getHiveDecimal();
}
@Override
public HiveDecimal getSum() {
return sum == null ? null : sum.getHiveDecimal();
}
@Override
public String toString() {
StringBuilder buf = new StringBuilder(super.toString());
if (getNumberOfValues() != 0) {
buf.append(" min: ");
buf.append(minimum);
buf.append(" max: ");
buf.append(maximum);
if (sum != null) {
buf.append(" sum: ");
buf.append(sum);
}
}
return buf.toString();
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (!(o instanceof DecimalStatisticsImpl)) {
return false;
}
if (!super.equals(o)) {
return false;
}
DecimalStatisticsImpl that = (DecimalStatisticsImpl) o;
if (minimum != null ? !minimum.equals(that.minimum) : that.minimum != null) {
return false;
}
if (maximum != null ? !maximum.equals(that.maximum) : that.maximum != null) {
return false;
}
if (sum != null ? !sum.equals(that.sum) : that.sum != null) {
return false;
}
return true;
}
@Override
public int hashCode() {
int result = super.hashCode();
result = 31 * result + (minimum != null ? minimum.hashCode() : 0);
result = 31 * result + (maximum != null ? maximum.hashCode() : 0);
result = 31 * result + (sum != null ? sum.hashCode() : 0);
return result;
}
}
private static final class Decimal64StatisticsImpl extends ColumnStatisticsImpl
implements DecimalColumnStatistics {
private final int scale;
private long minimum = Long.MAX_VALUE;
private long maximum = Long.MIN_VALUE;
private boolean hasSum = true;
private long sum = 0;
private final HiveDecimalWritable scratch = new HiveDecimalWritable();
Decimal64StatisticsImpl(int scale) {
this.scale = scale;
}
Decimal64StatisticsImpl(int scale, OrcProto.ColumnStatistics stats) {
super(stats);
this.scale = scale;
OrcProto.DecimalStatistics dec = stats.getDecimalStatistics();
if (dec.hasMaximum()) {
maximum = new HiveDecimalWritable(dec.getMaximum()).serialize64(scale);
} else {
maximum = Long.MIN_VALUE;
}
if (dec.hasMinimum()) {
minimum = new HiveDecimalWritable(dec.getMinimum()).serialize64(scale);
} else {
minimum = Long.MAX_VALUE;
}
if (dec.hasSum()) {
hasSum = true;
HiveDecimalWritable sumTmp = new HiveDecimalWritable(dec.getSum());
if (sumTmp.getHiveDecimal().integerDigitCount() + scale <=
TypeDescription.MAX_DECIMAL64_PRECISION) {
hasSum = true;
sum = sumTmp.serialize64(scale);
return;
}
}
hasSum = false;
}
@Override
public void reset() {
super.reset();
minimum = Long.MAX_VALUE;
maximum = Long.MIN_VALUE;
hasSum = true;
sum = 0;
}
@Override
public void updateDecimal(HiveDecimalWritable value) {
updateDecimal64(value.serialize64(scale), scale);
}
@Override
public void updateDecimal64(long value, int valueScale) {
// normalize the scale to our desired level
while (valueScale != scale) {
if (valueScale > scale) {
value /= 10;
valueScale -= 1;
} else {
value *= 10;
valueScale += 1;
}
}
if (value < TypeDescription.MIN_DECIMAL64 ||
value > TypeDescription.MAX_DECIMAL64) {
throw new IllegalArgumentException("Out of bounds decimal64 " + value);
}
if (minimum > value) {
minimum = value;
}
if (maximum < value) {
maximum = value;
}
if (hasSum) {
sum += value;
hasSum = sum <= TypeDescription.MAX_DECIMAL64 &&
sum >= TypeDescription.MIN_DECIMAL64;
}
}
@Override
public void merge(ColumnStatisticsImpl other) {
if (other instanceof Decimal64StatisticsImpl) {
Decimal64StatisticsImpl dec = (Decimal64StatisticsImpl) other;
if (getNumberOfValues() == 0) {
minimum = dec.minimum;
maximum = dec.maximum;
sum = dec.sum;
} else {
if (minimum > dec.minimum) {
minimum = dec.minimum;
}
if (maximum < dec.maximum) {
maximum = dec.maximum;
}
if (hasSum && dec.hasSum) {
sum += dec.sum;
hasSum = sum <= TypeDescription.MAX_DECIMAL64 &&
sum >= TypeDescription.MIN_DECIMAL64;
} else {
hasSum = false;
}
}
} else {
if (other.getNumberOfValues() != 0) {
throw new IllegalArgumentException("Incompatible merging of decimal column statistics");
}
}
super.merge(other);
}
@Override
public OrcProto.ColumnStatistics.Builder serialize() {
OrcProto.ColumnStatistics.Builder result = super.serialize();
OrcProto.DecimalStatistics.Builder dec =
OrcProto.DecimalStatistics.newBuilder();
if (getNumberOfValues() != 0) {
scratch.setFromLongAndScale(minimum, scale);
dec.setMinimum(scratch.toString());
scratch.setFromLongAndScale(maximum, scale);
dec.setMaximum(scratch.toString());
}
// Check hasSum for overflow.
if (hasSum) {
scratch.setFromLongAndScale(sum, scale);
dec.setSum(scratch.toString());
}
result.setDecimalStatistics(dec);
return result;
}
@Override
public HiveDecimal getMinimum() {
if (getNumberOfValues() > 0) {
scratch.setFromLongAndScale(minimum, scale);
return scratch.getHiveDecimal();
}
return null;
}
@Override
public HiveDecimal getMaximum() {
if (getNumberOfValues() > 0) {
scratch.setFromLongAndScale(maximum, scale);
return scratch.getHiveDecimal();
}
return null;
}
@Override
public HiveDecimal getSum() {
if (hasSum) {
scratch.setFromLongAndScale(sum, scale);
return scratch.getHiveDecimal();
}
return null;
}
@Override
public String toString() {
StringBuilder buf = new StringBuilder(super.toString());
if (getNumberOfValues() != 0) {
buf.append(" min: ");
buf.append(getMinimum());
buf.append(" max: ");
buf.append(getMaximum());
if (hasSum) {
buf.append(" sum: ");
buf.append(getSum());
}
}
return buf.toString();
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (!(o instanceof Decimal64StatisticsImpl)) {
return false;
}
if (!super.equals(o)) {
return false;
}
Decimal64StatisticsImpl that = (Decimal64StatisticsImpl) o;
if (minimum != that.minimum ||
maximum != that.maximum ||
hasSum != that.hasSum) {
return false;
}
return !hasSum || (sum == that.sum);
}
@Override
public int hashCode() {
int result = super.hashCode();
boolean hasValues = getNumberOfValues() > 0;
result = 31 * result + (hasValues ? (int) minimum : 0);
result = 31 * result + (hasValues ? (int) maximum : 0);
result = 31 * result + (hasSum ? (int) sum : 0);
return result;
}
}
private static final class DateStatisticsImpl extends ColumnStatisticsImpl
implements DateColumnStatistics {
private Integer minimum = null;
private Integer maximum = null;
DateStatisticsImpl() {
}
DateStatisticsImpl(OrcProto.ColumnStatistics stats,
boolean writerUsedProlepticGregorian,
boolean convertToProlepticGregorian) {
super(stats);
OrcProto.DateStatistics dateStats = stats.getDateStatistics();
// min,max values serialized/deserialized as int (days since epoch)
if (dateStats.hasMaximum()) {
maximum = DateUtils.convertDate(dateStats.getMaximum(),
writerUsedProlepticGregorian, convertToProlepticGregorian);
}
if (dateStats.hasMinimum()) {
minimum = DateUtils.convertDate(dateStats.getMinimum(),
writerUsedProlepticGregorian, convertToProlepticGregorian);
}
}
@Override
public void reset() {
super.reset();
minimum = null;
maximum = null;
}
@Override
public void updateDate(DateWritable value) {
if (minimum == null) {
minimum = value.getDays();
maximum = value.getDays();
} else if (minimum > value.getDays()) {
minimum = value.getDays();
} else if (maximum < value.getDays()) {
maximum = value.getDays();
}
}
@Override
public void updateDate(int value) {
if (minimum == null) {
minimum = value;
maximum = value;
} else if (minimum > value) {
minimum = value;
} else if (maximum < value) {
maximum = value;
}
}
@Override
public void merge(ColumnStatisticsImpl other) {
if (other instanceof DateStatisticsImpl) {
DateStatisticsImpl dateStats = (DateStatisticsImpl) other;
if (minimum == null) {
minimum = dateStats.minimum;
maximum = dateStats.maximum;
} else if (dateStats.minimum != null) {
if (minimum > dateStats.minimum) {
minimum = dateStats.minimum;
}
if (maximum < dateStats.maximum) {
maximum = dateStats.maximum;
}
}
} else {
if (isStatsExists() && minimum != null) {
throw new IllegalArgumentException("Incompatible merging of date column statistics");
}
}
super.merge(other);
}
@Override
public OrcProto.ColumnStatistics.Builder serialize() {
OrcProto.ColumnStatistics.Builder result = super.serialize();
OrcProto.DateStatistics.Builder dateStats =
OrcProto.DateStatistics.newBuilder();
if (getNumberOfValues() != 0 && minimum != null) {
dateStats.setMinimum(minimum);
dateStats.setMaximum(maximum);
}
result.setDateStatistics(dateStats);
return result;
}
private transient final DateWritable minDate = new DateWritable();
private transient final DateWritable maxDate = new DateWritable();
@Override
public Date getMinimum() {
if (minimum == null) {
return null;
}
minDate.set(minimum);
return minDate.get();
}
@Override
public Date getMaximum() {
if (maximum == null) {
return null;
}
maxDate.set(maximum);
return maxDate.get();
}
@Override
public String toString() {
StringBuilder buf = new StringBuilder(super.toString());
if (getNumberOfValues() != 0) {
buf.append(" min: ");
buf.append(getMinimum());
buf.append(" max: ");
buf.append(getMaximum());
}
return buf.toString();
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (!(o instanceof DateStatisticsImpl)) {
return false;
}
if (!super.equals(o)) {
return false;
}
DateStatisticsImpl that = (DateStatisticsImpl) o;
if (minimum != null ? !minimum.equals(that.minimum) : that.minimum != null) {
return false;
}
if (maximum != null ? !maximum.equals(that.maximum) : that.maximum != null) {
return false;
}
if (minDate != null ? !minDate.equals(that.minDate) : that.minDate != null) {
return false;
}
if (maxDate != null ? !maxDate.equals(that.maxDate) : that.maxDate != null) {
return false;
}
return true;
}
@Override
public int hashCode() {
int result = super.hashCode();
result = 31 * result + (minimum != null ? minimum.hashCode() : 0);
result = 31 * result + (maximum != null ? maximum.hashCode() : 0);
result = 31 * result + (minDate != null ? minDate.hashCode() : 0);
result = 31 * result + (maxDate != null ? maxDate.hashCode() : 0);
return result;
}
}
private static class TimestampStatisticsImpl extends ColumnStatisticsImpl
implements TimestampColumnStatistics {
private Long minimum = null;
private Long maximum = null;
TimestampStatisticsImpl() {
}
TimestampStatisticsImpl(OrcProto.ColumnStatistics stats,
boolean writerUsedProlepticGregorian,
boolean convertToProlepticGregorian) {
super(stats);
OrcProto.TimestampStatistics timestampStats = stats.getTimestampStatistics();
// min,max values serialized/deserialized as int (milliseconds since epoch)
if (timestampStats.hasMaximum()) {
maximum = DateUtils.convertTime(
SerializationUtils.convertToUtc(TimeZone.getDefault(),
timestampStats.getMaximum()),
writerUsedProlepticGregorian, convertToProlepticGregorian);
}
if (timestampStats.hasMinimum()) {
minimum = DateUtils.convertTime(
SerializationUtils.convertToUtc(TimeZone.getDefault(),
timestampStats.getMinimum()),
writerUsedProlepticGregorian, convertToProlepticGregorian);
}
if (timestampStats.hasMaximumUtc()) {
maximum = DateUtils.convertTime(timestampStats.getMaximumUtc(),
writerUsedProlepticGregorian, convertToProlepticGregorian);
}
if (timestampStats.hasMinimumUtc()) {
minimum = DateUtils.convertTime(timestampStats.getMinimumUtc(),
writerUsedProlepticGregorian, convertToProlepticGregorian);
}
}
@Override
public void reset() {
super.reset();
minimum = null;
maximum = null;
}
@Override
public void updateTimestamp(Timestamp value) {
long millis = SerializationUtils.convertToUtc(TimeZone.getDefault(),
value.getTime());
updateTimestamp(millis);
}
@Override
public void updateTimestamp(long value) {
if (minimum == null) {
minimum = value;
maximum = value;
} else if (minimum > value) {
minimum = value;
} else if (maximum < value) {
maximum = value;
}
}
@Override
public void merge(ColumnStatisticsImpl other) {
if (other instanceof TimestampStatisticsImpl) {
TimestampStatisticsImpl timestampStats = (TimestampStatisticsImpl) other;
if (minimum == null) {
minimum = timestampStats.minimum;
maximum = timestampStats.maximum;
} else if (timestampStats.minimum != null) {
if (minimum > timestampStats.minimum) {
minimum = timestampStats.minimum;
}
if (maximum < timestampStats.maximum) {
maximum = timestampStats.maximum;
}
}
} else {
if (isStatsExists() && minimum != null) {
throw new IllegalArgumentException("Incompatible merging of timestamp column statistics");
}
}
super.merge(other);
}
@Override
public OrcProto.ColumnStatistics.Builder serialize() {
OrcProto.ColumnStatistics.Builder result = super.serialize();
OrcProto.TimestampStatistics.Builder timestampStats = OrcProto.TimestampStatistics
.newBuilder();
if (getNumberOfValues() != 0 && minimum != null) {
timestampStats.setMinimumUtc(minimum);
timestampStats.setMaximumUtc(maximum);
}
result.setTimestampStatistics(timestampStats);
return result;
}
@Override
public Timestamp getMinimum() {
return minimum == null ? null :
new Timestamp(SerializationUtils.convertFromUtc(TimeZone.getDefault(),
minimum));
}
@Override
public Timestamp getMaximum() {
return maximum == null ? null :
new Timestamp(SerializationUtils.convertFromUtc(TimeZone.getDefault(),
maximum));
}
@Override
public Timestamp getMinimumUTC() {
return minimum == null ? null : new Timestamp(minimum);
}
@Override
public Timestamp getMaximumUTC() {
return maximum == null ? null : new Timestamp(maximum);
}
@Override
public String toString() {
StringBuilder buf = new StringBuilder(super.toString());
if (minimum != null || maximum != null) {
buf.append(" min: ");
buf.append(getMinimum());
buf.append(" max: ");
buf.append(getMaximum());
}
return buf.toString();
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (!(o instanceof TimestampStatisticsImpl)) {
return false;
}
if (!super.equals(o)) {
return false;
}
TimestampStatisticsImpl that = (TimestampStatisticsImpl) o;
if (minimum != null ? !minimum.equals(that.minimum) : that.minimum != null) {
return false;
}
if (maximum != null ? !maximum.equals(that.maximum) : that.maximum != null) {
return false;
}
return true;
}
@Override
public int hashCode() {
int result = super.hashCode();
result = 31 * result + (minimum != null ? minimum.hashCode() : 0);
result = 31 * result + (maximum != null ? maximum.hashCode() : 0);
return result;
}
}
private static final class TimestampInstantStatisticsImpl extends TimestampStatisticsImpl {
TimestampInstantStatisticsImpl() {
}
TimestampInstantStatisticsImpl(OrcProto.ColumnStatistics stats,
boolean writerUsedProlepticGregorian,
boolean convertToProlepticGregorian) {
super(stats, writerUsedProlepticGregorian, convertToProlepticGregorian);
}
@Override
public void updateTimestamp(Timestamp value) {
updateTimestamp(value.getTime());
}
@Override
public Timestamp getMinimum() {
return getMinimumUTC();
}
@Override
public Timestamp getMaximum() {
return getMaximumUTC();
}
}
protected long count = 0;
private boolean hasNull = false;
private long bytesOnDisk = 0;
ColumnStatisticsImpl(OrcProto.ColumnStatistics stats) {
if (stats.hasNumberOfValues()) {
count = stats.getNumberOfValues();
}
bytesOnDisk = stats.hasBytesOnDisk() ? stats.getBytesOnDisk() : 0;
if (stats.hasHasNull()) {
hasNull = stats.getHasNull();
} else {
hasNull = true;
}
}
ColumnStatisticsImpl() {
}
public void increment() {
count += 1;
}
public void increment(int count) {
this.count += count;
}
public void updateByteCount(long size) {
this.bytesOnDisk += size;
}
public void setNull() {
hasNull = true;
}
/**
* Update the collection length for Map and List type.
* @param value length of collection
*/
public void updateCollectionLength(final long value) {
throw new UnsupportedOperationException(
"Can't update collection count");
}
public void updateBoolean(boolean value, int repetitions) {
throw new UnsupportedOperationException("Can't update boolean");
}
public void updateInteger(long value, int repetitions) {
throw new UnsupportedOperationException("Can't update integer");
}
public void updateDouble(double value) {
throw new UnsupportedOperationException("Can't update double");
}
public void updateString(Text value) {
throw new UnsupportedOperationException("Can't update string");
}
public void updateString(byte[] bytes, int offset, int length,
int repetitions) {
throw new UnsupportedOperationException("Can't update string");
}
public void updateBinary(BytesWritable value) {
throw new UnsupportedOperationException("Can't update binary");
}
public void updateBinary(byte[] bytes, int offset, int length,
int repetitions) {
throw new UnsupportedOperationException("Can't update string");
}
public void updateDecimal(HiveDecimalWritable value) {
throw new UnsupportedOperationException("Can't update decimal");
}
public void updateDecimal64(long value, int scale) {
throw new UnsupportedOperationException("Can't update decimal");
}
public void updateDate(DateWritable value) {
throw new UnsupportedOperationException("Can't update date");
}
public void updateDate(int value) {
throw new UnsupportedOperationException("Can't update date");
}
public void updateTimestamp(Timestamp value) {
throw new UnsupportedOperationException("Can't update timestamp");
}
public void updateTimestamp(long value) {
throw new UnsupportedOperationException("Can't update timestamp");
}
public boolean isStatsExists() {
return (count > 0 || hasNull == true);
}
public void merge(ColumnStatisticsImpl stats) {
count += stats.count;
hasNull |= stats.hasNull;
bytesOnDisk += stats.bytesOnDisk;
}
public void reset() {
count = 0;
bytesOnDisk = 0;
hasNull = false;
}
@Override
public long getNumberOfValues() {
return count;
}
@Override
public boolean hasNull() {
return hasNull;
}
/**
* Get the number of bytes for this column.
*
* @return the number of bytes
*/
@Override
public long getBytesOnDisk() {
return bytesOnDisk;
}
@Override
public String toString() {
return "count: " + count + " hasNull: " + hasNull +
(bytesOnDisk != 0 ? " bytesOnDisk: " + bytesOnDisk : "");
}
public OrcProto.ColumnStatistics.Builder serialize() {
OrcProto.ColumnStatistics.Builder builder =
OrcProto.ColumnStatistics.newBuilder();
builder.setNumberOfValues(count);
builder.setHasNull(hasNull);
if (bytesOnDisk != 0) {
builder.setBytesOnDisk(bytesOnDisk);
}
return builder;
}
public static ColumnStatisticsImpl create(TypeDescription schema) {
switch (schema.getCategory()) {
case BOOLEAN:
return new BooleanStatisticsImpl();
case BYTE:
case SHORT:
case INT:
case LONG:
return new IntegerStatisticsImpl();
case LIST:
case MAP:
return new CollectionColumnStatisticsImpl();
case FLOAT:
case DOUBLE:
return new DoubleStatisticsImpl();
case STRING:
case CHAR:
case VARCHAR:
return new StringStatisticsImpl();
case DECIMAL:
if (schema.getPrecision() <= TypeDescription.MAX_DECIMAL64_PRECISION) {
return new Decimal64StatisticsImpl(schema.getScale());
} else {
return new DecimalStatisticsImpl();
}
case DATE:
return new DateStatisticsImpl();
case TIMESTAMP:
return new TimestampStatisticsImpl();
case TIMESTAMP_INSTANT:
return new TimestampInstantStatisticsImpl();
case BINARY:
return new BinaryStatisticsImpl();
default:
return new ColumnStatisticsImpl();
}
}
public static ColumnStatisticsImpl deserialize(TypeDescription schema,
OrcProto.ColumnStatistics stats) {
return deserialize(schema, stats, false, false);
}
public static ColumnStatisticsImpl deserialize(TypeDescription schema,
OrcProto.ColumnStatistics stats,
ReaderImpl reader) {
return deserialize(schema, stats, reader.writerUsedProlepticGregorian(),
reader.options.getConvertToProlepticGregorian());
}
public static ColumnStatisticsImpl deserialize(TypeDescription schema,
OrcProto.ColumnStatistics stats,
boolean writerUsedProlepticGregorian,
boolean convertToProlepticGregorian) {
if (stats.hasBucketStatistics()) {
return new BooleanStatisticsImpl(stats);
} else if (stats.hasIntStatistics()) {
return new IntegerStatisticsImpl(stats);
} else if (stats.hasCollectionStatistics()) {
return new CollectionColumnStatisticsImpl(stats);
} else if (stats.hasDoubleStatistics()) {
return new DoubleStatisticsImpl(stats);
} else if (stats.hasStringStatistics()) {
return new StringStatisticsImpl(stats);
} else if (stats.hasDecimalStatistics()) {
if (schema != null &&
schema.getPrecision() <= TypeDescription.MAX_DECIMAL64_PRECISION) {
return new Decimal64StatisticsImpl(schema.getScale(), stats);
} else {
return new DecimalStatisticsImpl(stats);
}
} else if (stats.hasDateStatistics()) {
return new DateStatisticsImpl(stats, writerUsedProlepticGregorian,
convertToProlepticGregorian);
} else if (stats.hasTimestampStatistics()) {
return schema == null ||
schema.getCategory() == TypeDescription.Category.TIMESTAMP ?
new TimestampStatisticsImpl(stats,
writerUsedProlepticGregorian, convertToProlepticGregorian) :
new TimestampInstantStatisticsImpl(stats,
writerUsedProlepticGregorian, convertToProlepticGregorian);
} else if(stats.hasBinaryStatistics()) {
return new BinaryStatisticsImpl(stats);
} else {
return new ColumnStatisticsImpl(stats);
}
}
}