blob: 13f43bf7531b4a0c974384f35a5469c1f53c01fd [file] [log] [blame]
/*
* Copyright 2016, Yahoo! Inc.
* Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
*/
package com.yahoo.sketches.hive.tuple;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import com.yahoo.sketches.tuple.DoubleSummary;
import com.yahoo.sketches.tuple.DoubleSummaryDeserializer;
import com.yahoo.sketches.tuple.DoubleSummarySetOperations;
import com.yahoo.sketches.tuple.SummaryDeserializer;
import com.yahoo.sketches.tuple.SummaryFactory;
import com.yahoo.sketches.tuple.SummarySetOperations;
/**
* This simple implementation is to give an example of a concrete UDAF based on the abstract
* UnionSketchUDAF if no extra arguments are needed.. The same functionality is included into
* UnionDoubleSummaryWithModeSketchUDAF with the default summary mode of Sum, but the
* implementation is more complex because of the extra argument.
*/
@Description(
name = "UnionSketch",
value = "_FUNC_(sketch, nominal number of entries)",
extended = "Returns a Sketch<DoubleSummary> as a binary blob that can be operated on by other"
+ " tuple sketch related functions. The nominal number of entries is optional, must be a power of 2,"
+ " does not have to match the input sketches, and controls the relative error expected"
+ " from the sketch. A number of 16384 can be expected to yield errors of roughly +-1.5% in"
+ " the estimation of uniques. The default number is defined in the sketches-core library"
+ " and at the time of this writing was 4096 (about 3% error).")
public class UnionDoubleSummarySketchUDAF extends UnionSketchUDAF {
@Override
public GenericUDAFEvaluator createEvaluator() {
return new UnionDoubleSummarySketchEvaluator();
}
public static class UnionDoubleSummarySketchEvaluator extends UnionSketchEvaluator<DoubleSummary> {
private static final SummaryDeserializer<DoubleSummary> SUMMARY_DESERIALIZER =
new DoubleSummaryDeserializer();
private static final SummarySetOperations<DoubleSummary> SUMMARY_SET_OPS =
new DoubleSummarySetOperations();
@Override
protected SummaryDeserializer<DoubleSummary> getSummaryDeserializer() {
return SUMMARY_DESERIALIZER;
}
@Override
protected SummaryFactory<DoubleSummary> getSummaryFactory(final Object[] data) {
return null; // union never needs to create new instances
}
@Override
protected SummarySetOperations<DoubleSummary> getSummarySetOperationsForIterate(final Object[] data) {
return SUMMARY_SET_OPS;
}
@Override
protected SummarySetOperations<DoubleSummary> getSummarySetOperationsForMerge(final Object data) {
return SUMMARY_SET_OPS;
}
}
}