blob: e1429623366be33847de9e2ceae31be07d9bd6bf [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen;
import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationBufferRow;
import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationDesc;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.common.ndv.hll.HyperLogLogUtils;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
/**
* Generated from template VectorUDAFComputeBitVector.txt.
*/
@Description(name = "compute_bit_vector_hll",
value = "_FUNC_(x) - Computes bit vector for NDV computation")
public class <ClassName> extends VectorAggregateExpression {
public <ClassName>() {
super();
}
public <ClassName>(VectorAggregationDesc vecAggrDesc) {
super(vecAggrDesc);
}
@Override
public AggregationBuffer getNewAggregationBuffer() throws HiveException {
return new Aggregation();
}
@Override
public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) throws HiveException {
inputExpression.evaluate(batch);
#IF COMPLETE
<InputColumnVectorType> inputColumn = (<InputColumnVectorType>) batch.cols[this.inputExpression.getOutputColumnNum()];
#ENDIF COMPLETE
#IF MERGING
BytesColumnVector inputColumn = (BytesColumnVector) batch.cols[this.inputExpression.getOutputColumnNum()];
#ENDIF MERGING
int batchSize = batch.size;
if (batchSize == 0) {
return;
}
Aggregation myagg = (Aggregation) agg;
#IF COMPLETE
myagg.prepare();
if (inputColumn.noNulls) {
if (inputColumn.isRepeating) {
myagg.estimator.addToEstimator(inputColumn.vector[0]);
} else {
if (batch.selectedInUse) {
for (int i = 0; i < batchSize; i++) {
int s = batch.selected[i];
myagg.estimator.addToEstimator(inputColumn.vector[s]);
}
} else {
for (int i = 0; i < batchSize; i++) {
myagg.estimator.addToEstimator(inputColumn.vector[i]);
}
}
}
} else {
if (inputColumn.isRepeating) {
if (!inputColumn.isNull[0]) {
myagg.estimator.addToEstimator(inputColumn.vector[0]);
}
} else {
if (batch.selectedInUse) {
for (int i = 0; i < batchSize; i++) {
int s = batch.selected[i];
if (!inputColumn.isNull[s]) {
myagg.estimator.addToEstimator(inputColumn.vector[s]);
}
}
} else {
for (int i = 0; i < batchSize; i++) {
if (!inputColumn.isNull[i]) {
myagg.estimator.addToEstimator(inputColumn.vector[i]);
}
}
}
}
}
#ENDIF COMPLETE
#IF MERGING
if (inputColumn.isRepeating) {
if (!inputColumn.isNull[0] && inputColumn.length[0] > 0) {
myagg.prepare();
HyperLogLog mergingHLL = HyperLogLogUtils.deserializeHLL(inputColumn.vector[0], inputColumn.start[0], inputColumn.length[0]);
myagg.estimator.mergeEstimators(mergingHLL);
}
} else {
for (int i = 0; i < batchSize; i++) {
int s = i;
if (batch.selectedInUse) {
s = batch.selected[i];
}
if (!inputColumn.isNull[s] && inputColumn.length[s] > 0) {
myagg.prepare();
HyperLogLog mergingHLL = HyperLogLogUtils.deserializeHLL(inputColumn.vector[s], inputColumn.start[s], inputColumn.length[s]);
myagg.estimator.mergeEstimators(mergingHLL);
}
}
}
#ENDIF MERGING
}
private Aggregation getAggregation(VectorAggregationBufferRow[] sets, int rowid, int bufferIndex) {
VectorAggregationBufferRow bufferRow = sets[rowid];
Aggregation myagg = (Aggregation) bufferRow.getAggregationBuffer(bufferIndex);
myagg.prepare();
return myagg;
}
@Override
public void aggregateInputSelection(VectorAggregationBufferRow[] aggregationBufferSets, int aggregateIndex, VectorizedRowBatch batch) throws HiveException {
inputExpression.evaluate(batch);
#IF COMPLETE
<InputColumnVectorType> inputColumn = (<InputColumnVectorType>) batch.cols[this.inputExpression.getOutputColumnNum()];
#ENDIF COMPLETE
#IF MERGING
BytesColumnVector inputColumn = (BytesColumnVector) batch.cols[this.inputExpression.getOutputColumnNum()];
#ENDIF MERGING
int batchSize = batch.size;
if (batchSize == 0) {
return;
}
#IF COMPLETE
if (inputColumn.noNulls) {
if (inputColumn.isRepeating) {
for (int i = 0; i < batchSize; i++) {
Aggregation myagg = getAggregation(aggregationBufferSets, i, aggregateIndex);
myagg.estimator.addToEstimator(inputColumn.vector[0]);
}
} else {
if (batch.selectedInUse) {
for (int i = 0; i < batchSize; i++) {
int s = batch.selected[i];
Aggregation myagg = getAggregation(aggregationBufferSets, i, aggregateIndex);
myagg.estimator.addToEstimator(inputColumn.vector[s]);
}
} else {
for (int i = 0; i < batchSize; i++) {
Aggregation myagg = getAggregation(aggregationBufferSets, i, aggregateIndex);
myagg.estimator.addToEstimator(inputColumn.vector[i]);
}
}
}
} else {
if (inputColumn.isRepeating) {
if (!inputColumn.isNull[0]) {
for (int i = 0; i < batchSize; i++) {
Aggregation myagg = getAggregation(aggregationBufferSets, i, aggregateIndex);
myagg.estimator.addToEstimator(inputColumn.vector[0]);
}
}
} else {
if (batch.selectedInUse) {
for (int i = 0; i < batchSize; i++) {
int s = batch.selected[i];
if (!inputColumn.isNull[s]) {
Aggregation myagg = getAggregation(aggregationBufferSets, i, aggregateIndex);
myagg.estimator.addToEstimator(inputColumn.vector[s]);
}
}
} else {
for (int i = 0; i < batchSize; i++) {
if (!inputColumn.isNull[i]) {
Aggregation myagg = getAggregation(aggregationBufferSets, i, aggregateIndex);
myagg.estimator.addToEstimator(inputColumn.vector[i]);
}
}
}
}
}
#ENDIF COMPLETE
#IF MERGING
if (inputColumn.isRepeating) {
if (!inputColumn.isNull[0] && inputColumn.length[0] > 0) {
for (int i = 0; i < batchSize; i++) {
Aggregation myagg = getAggregation(aggregationBufferSets, i, aggregateIndex);
HyperLogLog mergingHLL = HyperLogLogUtils.deserializeHLL(inputColumn.vector[0], inputColumn.start[0], inputColumn.length[0]);
myagg.estimator.mergeEstimators(mergingHLL);
}
}
} else {
for (int i = 0; i < batchSize; i++) {
int s = i;
if (batch.selectedInUse) {
s = batch.selected[i];
}
if (!inputColumn.isNull[s] && inputColumn.length[s] > 0) {
Aggregation myagg = getAggregation(aggregationBufferSets, i, aggregateIndex);
HyperLogLog mergingHLL = HyperLogLogUtils.deserializeHLL(inputColumn.vector[s], inputColumn.start[s], inputColumn.length[s]);
myagg.estimator.mergeEstimators(mergingHLL);
}
}
}
#ENDIF MERGING
}
@Override
public void reset(AggregationBuffer agg) throws HiveException {
agg.reset();
}
@Override
public long getAggregationBufferFixedSize() {
return Aggregation.FIXED_SIZE;
}
@Override
public boolean matches(String name, ColumnVector.Type inputColVectorType, ColumnVector.Type outputColVectorType, GenericUDAFEvaluator.Mode mode) {
return name.equals("compute_bit_vector_hll") &&
outputColVectorType == ColumnVector.Type.BYTES &&
#IF MERGING
inputColVectorType == ColumnVector.Type.BYTES &&
(mode == GenericUDAFEvaluator.Mode.PARTIAL2 || mode == GenericUDAFEvaluator.Mode.FINAL);
#ENDIF MERGING
#IF COMPLETE
inputColVectorType == ColumnVector.Type.<UpperCaseColumnVectorType> &&
(mode == GenericUDAFEvaluator.Mode.PARTIAL1 || mode == GenericUDAFEvaluator.Mode.COMPLETE);
#ENDIF COMPLETE
}
@Override
public void assignRowColumn(VectorizedRowBatch batch, int batchIndex, int columnNum, AggregationBuffer agg) throws HiveException {
Aggregation myagg = (Aggregation) agg;
BytesColumnVector outputCol = (BytesColumnVector) batch.cols[columnNum];
if (myagg.estimator == null) {
outputCol.isNull[batchIndex] = true;
outputCol.noNulls = false;
} else {
outputCol.isNull[batchIndex] = false;
outputCol.isRepeating = false;
byte[] outputbuf = myagg.estimator.serialize();
outputCol.setRef(batchIndex, outputbuf, 0, outputbuf.length);
}
}
static class Aggregation implements AggregationBuffer {
private static final long FIXED_SIZE = HyperLogLog.builder().setSizeOptimized().build().lengthFor(null);
HyperLogLog estimator;
@Override
public int getVariableSize() {
throw new UnsupportedOperationException();
}
@Override
public void reset() {
estimator = null;
}
public void prepare() {
if (estimator == null) {
estimator = HyperLogLog.builder().setSizeOptimized().build();
}
}
}
}