blob: 7f7dd1bfd6dedaf191a329ff1a901993bd1c9946 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iotdb.db.queryengine.execution.aggregation;
import org.apache.tsfile.block.column.Column;
import org.apache.tsfile.block.column.ColumnBuilder;
import org.apache.tsfile.enums.TSDataType;
import org.apache.tsfile.file.metadata.statistics.Statistics;
import org.apache.tsfile.utils.Binary;
import org.apache.tsfile.utils.BitMap;
import org.apache.tsfile.utils.BytesUtils;
import org.apache.tsfile.write.UnSupportedDataTypeException;
import java.util.Arrays;
import static com.google.common.base.Preconditions.checkArgument;
public class VarianceAccumulator implements Accumulator {
public enum VarianceType {
STDDEV_POP,
STDDEV_SAMP,
VAR_POP,
VAR_SAMP,
}
private final TSDataType seriesDataType;
private final VarianceType varianceType;
private long count;
private double mean;
private double m2;
public VarianceAccumulator(TSDataType seriesDataType, VarianceType varianceType) {
this.seriesDataType = seriesDataType;
this.varianceType = varianceType;
}
@Override
public void addInput(Column[] columns, BitMap bitMap) {
switch (seriesDataType) {
case INT32:
addIntInput(columns, bitMap);
return;
case INT64:
addLongInput(columns, bitMap);
return;
case FLOAT:
addFloatInput(columns, bitMap);
return;
case DOUBLE:
addDoubleInput(columns, bitMap);
return;
case TEXT:
case BOOLEAN:
default:
throw new UnSupportedDataTypeException(
String.format("Unsupported data type in aggregation variance : %s", seriesDataType));
}
}
@Override
public void addIntermediate(Column[] partialResult) {
checkArgument(partialResult.length == 1, "partialResult of variance should be 1");
if (partialResult[0].isNull(0)) {
return;
}
byte[] bytes = partialResult[0].getBinary(0).getValues();
long intermediateCount = BytesUtils.bytesToLong(bytes, Long.BYTES);
double intermediateMean = BytesUtils.bytesToDouble(bytes, Long.BYTES);
double intermediateM2 = BytesUtils.bytesToDouble(bytes, (Long.BYTES + Double.BYTES));
long newCount = count + intermediateCount;
double newMean = ((intermediateCount * intermediateMean) + (count * mean)) / newCount;
double delta = intermediateMean - mean;
m2 = m2 + intermediateM2 + delta * delta * intermediateCount * count / newCount;
count = newCount;
mean = newMean;
}
@Override
public void removeIntermediate(Column[] input) {
checkArgument(input.length == 1, "Input of variance should be 1");
if (input[0].isNull(0)) {
return;
}
// Deserialize
byte[] bytes = input[0].getBinary(0).getValues();
long intermediateCount = BytesUtils.bytesToLong(bytes, Long.BYTES);
double intermediateMean = BytesUtils.bytesToDouble(bytes, Long.BYTES);
double intermediateM2 = BytesUtils.bytesToDouble(bytes, (Long.BYTES + Double.BYTES));
// Remove from state
long newCount = count - intermediateCount;
double newMean = ((count * mean) - (intermediateCount * intermediateMean)) / newCount;
double delta = intermediateMean - mean;
m2 = m2 - intermediateM2 - delta * delta * intermediateCount * count / newCount;
count = newCount;
mean = newMean;
}
@Override
public void addStatistics(Statistics statistics) {
throw new UnsupportedOperationException(getClass().getName());
}
@Override
public void setFinal(Column finalResult) {
reset();
if (finalResult.isNull(0)) {
return;
}
count = 1;
double value = finalResult.getDouble(0);
mean = value;
m2 = value * value;
}
@Override
public void outputIntermediate(ColumnBuilder[] columnBuilders) {
checkArgument(columnBuilders.length == 1, "partialResult of variance should be 1");
if (count == 0) {
columnBuilders[0].appendNull();
} else {
byte[] bytes = serialize();
columnBuilders[0].writeBinary(new Binary(bytes));
}
}
private byte[] serialize() {
byte[] countBytes = BytesUtils.longToBytes(count);
byte[] meanBytes = BytesUtils.doubleToBytes(mean);
byte[] m2Bytes = BytesUtils.doubleToBytes(m2);
return BytesUtils.concatByteArrayList(Arrays.asList(countBytes, meanBytes, m2Bytes));
}
@Override
public void outputFinal(ColumnBuilder columnBuilder) {
switch (varianceType) {
case STDDEV_POP:
if (count == 0) {
columnBuilder.appendNull();
} else {
columnBuilder.writeDouble(Math.sqrt(m2 / count));
}
break;
case STDDEV_SAMP:
if (count < 2) {
columnBuilder.appendNull();
} else {
columnBuilder.writeDouble(Math.sqrt(m2 / (count - 1)));
}
break;
case VAR_POP:
if (count == 0) {
columnBuilder.appendNull();
} else {
columnBuilder.writeDouble(m2 / count);
}
break;
case VAR_SAMP:
if (count < 2) {
columnBuilder.appendNull();
} else {
columnBuilder.writeDouble(m2 / (count - 1));
}
break;
default:
throw new EnumConstantNotPresentException(VarianceType.class, varianceType.name());
}
}
@Override
public void reset() {
count = 0;
mean = 0.0;
m2 = 0.0;
}
@Override
public boolean hasFinalResult() {
return false;
}
@Override
public TSDataType[] getIntermediateType() {
return new TSDataType[] {TSDataType.TEXT};
}
@Override
public TSDataType getFinalType() {
return TSDataType.DOUBLE;
}
private void addIntInput(Column[] columns, BitMap bitmap) {
int size = columns[0].getPositionCount();
for (int i = 0; i < size; i++) {
if (bitmap != null && !bitmap.isMarked(i)) {
continue;
}
if (!columns[1].isNull(i)) {
int value = columns[1].getInt(i);
count++;
double delta = value - mean;
mean += delta / count;
m2 += delta * (value - mean);
}
}
}
private void addLongInput(Column[] columns, BitMap bitmap) {
int size = columns[0].getPositionCount();
for (int i = 0; i < size; i++) {
if (bitmap != null && !bitmap.isMarked(i)) {
continue;
}
if (!columns[1].isNull(i)) {
long value = columns[1].getLong(i);
count++;
double delta = value - mean;
mean += delta / count;
m2 += delta * (value - mean);
}
}
}
private void addFloatInput(Column[] columns, BitMap bitmap) {
int size = columns[0].getPositionCount();
for (int i = 0; i < size; i++) {
if (bitmap != null && !bitmap.isMarked(i)) {
continue;
}
if (!columns[1].isNull(i)) {
float value = columns[1].getFloat(i);
count++;
double delta = value - mean;
mean += delta / count;
m2 += delta * (value - mean);
}
}
}
private void addDoubleInput(Column[] columns, BitMap bitmap) {
int size = columns[0].getPositionCount();
for (int i = 0; i < size; i++) {
if (bitmap != null && !bitmap.isMarked(i)) {
continue;
}
if (!columns[1].isNull(i)) {
double value = columns[1].getDouble(i);
count++;
double delta = value - mean;
mean += delta / count;
m2 += delta * (value - mean);
}
}
}
}