blob: 5cc64e146be740cf9cd23855ae866544601515c5 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iotdb.tsfile.file.metadata.statistics;
import org.apache.iotdb.tsfile.common.conf.TSFileDescriptor;
import org.apache.iotdb.tsfile.encoding.encoder.PlainEncoder;
import org.apache.iotdb.tsfile.encoding.encoder.SDTEncoder;
import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType;
import org.apache.iotdb.tsfile.read.common.ValuePoint;
import org.apache.iotdb.tsfile.utils.PublicBAOS;
import org.eclipse.collections.impl.list.mutable.primitive.DoubleArrayList;
import org.eclipse.collections.impl.list.mutable.primitive.IntArrayList;
import java.util.ArrayList;
import java.util.List;
public class ValueIndex {
int errorParam = TSFileDescriptor.getInstance().getConfig().getErrorParam();
private DoubleArrayList values = new DoubleArrayList();
public SDTEncoder sdtEncoder = new SDTEncoder();
public double errorBound = 0;
public PlainEncoder idxEncoder = new PlainEncoder(TSDataType.INT32, 0);
public PlainEncoder valueEncoder = new PlainEncoder(TSDataType.DOUBLE, 0);
public PublicBAOS idxOut = new PublicBAOS();
public PublicBAOS valueOut = new PublicBAOS();
public IntArrayList modelPointIdx_list = new IntArrayList();
public DoubleArrayList modelPointVal_list = new DoubleArrayList();
public List<ValuePoint> sortedModelPoints =
new ArrayList<>(); // sorted by value in ascending order
// this is necessary, otherwise serialized twice by timeseriesMetadata and chunkMetadata
// causing learn() executed more than once!!
private boolean isLearned = false;
public int modelPointCount = 0; // except the first and last points
private double stdDev = 0; // standard deviation of intervals
private long count = 0;
private double sumX2 = 0.0;
private double sumX1 = 0.0;
public void insert(int value) {
values.add((double) value);
count++;
sumX1 += (double) value;
sumX2 += (double) value * (double) value;
}
public void insert(long value) {
values.add((double) value);
count++;
sumX1 += (double) value;
sumX2 += (double) value * (double) value;
}
public void insert(float value) {
values.add((double) value);
count++;
sumX1 += (double) value;
sumX2 += (double) value * (double) value;
}
public void insert(double value) {
values.add(value);
count++;
sumX1 += value;
sumX2 += value * value;
}
public double getStdDev() { // sample standard deviation
double std = Math.sqrt(this.sumX2 / this.count - Math.pow(this.sumX1 / this.count, 2));
return Math.sqrt(Math.pow(std, 2) * this.count / (this.count - 1));
}
private void initForLearn() {
this.stdDev = getStdDev();
this.errorBound = 2 * stdDev * errorParam;
this.sdtEncoder.setCompDeviation(errorBound / 2.0); // stdDev
}
public void learn() {
if (isLearned) {
// this is necessary, otherwise serialized twice by timeseriesMetadata and chunkMetadata
// causing learn() executed more than once!!
return;
}
isLearned = true;
initForLearn(); // set self-adapting CompDeviation for sdtEncoder
int pos = 0;
boolean hasDataToFlush = false;
for (double v : values.toArray()) {
pos++; // starting from 1
if (sdtEncoder.encodeDouble(pos, v)) {
if (pos > 1) {
// the first point value is stored as FirstValue in statistics, so here no need store the
// first point
// the last point won't be checked by the if SDT encode logic
modelPointCount++;
idxEncoder.encode((int) sdtEncoder.getTime(), idxOut);
valueEncoder.encode(sdtEncoder.getDoubleValue(), valueOut);
if (!hasDataToFlush) {
hasDataToFlush = true;
}
}
}
}
if (hasDataToFlush) {
// otherwise no need flush, because GorillaV2 encoding will output NaN even if
// hasDataToFlush=false
idxEncoder.flush(idxOut); // necessary
valueEncoder.flush(valueOut); // necessary
}
values = null; // raw values are not needed any more
}
}