HAWQ-1767. Add TypeKind for aggregate function intermediate output
diff --git a/depends/dbcommon/src/dbcommon/common/vector.cc b/depends/dbcommon/src/dbcommon/common/vector.cc
index e453e83..0c96f21 100644
--- a/depends/dbcommon/src/dbcommon/common/vector.cc
+++ b/depends/dbcommon/src/dbcommon/common/vector.cc
@@ -125,6 +125,14 @@
case MAGMATID:
ret = std::unique_ptr<Vector>(new MagmaTidVector(ownData));
break;
+ case AVG_DOUBLE_TRANS_DATA_ID:
+ ret = std::unique_ptr<Vector>(new StructVector(ownData));
+ ret->setTypeKind(AVG_DOUBLE_TRANS_DATA_ID);
+ break;
+ case AVG_DECIMAL_TRANS_DATA_ID:
+ ret = std::unique_ptr<Vector>(new StructVector(ownData));
+ ret->setTypeKind(AVG_DECIMAL_TRANS_DATA_ID);
+ break;
default:
LOG_ERROR(ERRCODE_INTERNAL_ERROR, "Not supported type %d", type);
}
diff --git a/depends/dbcommon/src/dbcommon/common/vector/struct-vector.h b/depends/dbcommon/src/dbcommon/common/vector/struct-vector.h
index dbd5d43..c2c40b5 100644
--- a/depends/dbcommon/src/dbcommon/common/vector/struct-vector.h
+++ b/depends/dbcommon/src/dbcommon/common/vector/struct-vector.h
@@ -130,6 +130,7 @@
std::unique_ptr<Vector> cloneSelected(const SelectList *sel) const override {
std::unique_ptr<Vector> vec(new StructVector(false));
+ vec->setTypeKind(this->getTypeKind());
for (int i = 0; i < childs.size(); i++) {
vec->addChildVector(childs[i]->cloneSelected(sel));
}
diff --git a/depends/dbcommon/src/dbcommon/function/agg-func.cc b/depends/dbcommon/src/dbcommon/function/agg-func.cc
index e587da4..48cfc96 100644
--- a/depends/dbcommon/src/dbcommon/function/agg-func.cc
+++ b/depends/dbcommon/src/dbcommon/function/agg-func.cc
@@ -836,7 +836,8 @@
Object *para = DatumGetValue<Object *>(params[4]);
Vector *vec = reinterpret_cast<Vector *>(para);
- assert(dynamic_cast<Vector *>(para)->getTypeKind() == TypeKind::STRUCTID);
+ assert(dynamic_cast<Vector *>(para)->getTypeKind() ==
+ TypeKind::AVG_DOUBLE_TRANS_DATA_ID);
Accessor accessor = grpVals.getAccessor<Accessor>();
@@ -1144,7 +1145,8 @@
Object *para = DatumGetValue<Object *>(params[4]);
Vector *vec = reinterpret_cast<Vector *>(para);
- assert(dynamic_cast<Vector *>(para)->getTypeKind() == TypeKind::STRUCTID);
+ assert(dynamic_cast<Vector *>(para)->getTypeKind() ==
+ TypeKind::AVG_DECIMAL_TRANS_DATA_ID);
Accessor accessor = grpVals.getAccessor<Accessor>();
diff --git a/depends/dbcommon/src/dbcommon/function/func.cc b/depends/dbcommon/src/dbcommon/function/func.cc
index 4dbb6d2..8b3220d 100644
--- a/depends/dbcommon/src/dbcommon/function/func.cc
+++ b/depends/dbcommon/src/dbcommon/function/func.cc
@@ -131,39 +131,39 @@
// [[[end]]]
FuncEntryArray.push_back({AVG_TINYINT, "avg", DOUBLEID, {TINYINTID}});
- FuncEntryArray.push_back({AVG_TINYINT_ACCU, "avg_tinyint_accu", STRUCTID, {TINYINTID}, avg_int8_accu});
- FuncEntryArray.push_back({AVG_TINYINT_AMALG, "avg_tinyint_amalg", STRUCTID, {STRUCTID}, avg_int8_amalg});
- FuncEntryArray.push_back({AVG_TINYINT_AVG, "avg_tinyint_avg", DOUBLEID, {STRUCTID}, avg_double_avg});
+ FuncEntryArray.push_back({AVG_TINYINT_ACCU, "avg_tinyint_accu", AVG_DECIMAL_TRNANS_DATA_ID, {TINYINTID}, avg_int8_accu});
+ FuncEntryArray.push_back({AVG_TINYINT_AMALG, "avg_tinyint_amalg", AVG_DECIMAL_TRNANS_DATA_ID, {AVG_DECIMAL_TRNANS_DATA_ID}, avg_int8_amalg});
+ FuncEntryArray.push_back({AVG_TINYINT_AVG, "avg_tinyint_avg", DOUBLEID, {AVG_DECIMAL_TRNANS_DATA_ID}, avg_double_avg});
FuncEntryArray.push_back({AVG_SMALLINT, "avg", DOUBLEID, {SMALLINTID}});
- FuncEntryArray.push_back({AVG_SMALLINT_ACCU, "avg_smallint_accu", STRUCTID, {SMALLINTID}, avg_int16_accu});
- FuncEntryArray.push_back({AVG_SMALLINT_AMALG, "avg_smallint_amalg", STRUCTID, {STRUCTID}, avg_int16_amalg});
- FuncEntryArray.push_back({AVG_SMALLINT_AVG, "avg_smallint_avg", DOUBLEID, {STRUCTID}, avg_double_avg});
+ FuncEntryArray.push_back({AVG_SMALLINT_ACCU, "avg_smallint_accu", AVG_DECIMAL_TRNANS_DATA_ID, {SMALLINTID}, avg_int16_accu});
+ FuncEntryArray.push_back({AVG_SMALLINT_AMALG, "avg_smallint_amalg", AVG_DECIMAL_TRNANS_DATA_ID, {AVG_DECIMAL_TRNANS_DATA_ID}, avg_int16_amalg});
+ FuncEntryArray.push_back({AVG_SMALLINT_AVG, "avg_smallint_avg", DOUBLEID, {AVG_DECIMAL_TRNANS_DATA_ID}, avg_double_avg});
FuncEntryArray.push_back({AVG_INT, "avg", DOUBLEID, {INTID}});
- FuncEntryArray.push_back({AVG_INT_ACCU, "avg_int_accu", STRUCTID, {INTID}, avg_int32_accu});
- FuncEntryArray.push_back({AVG_INT_AMALG, "avg_int_amalg", STRUCTID, {STRUCTID}, avg_int32_amalg});
- FuncEntryArray.push_back({AVG_INT_AVG, "avg_int_avg", DOUBLEID, {STRUCTID}, avg_double_avg});
+ FuncEntryArray.push_back({AVG_INT_ACCU, "avg_int_accu", AVG_DECIMAL_TRNANS_DATA_ID, {INTID}, avg_int32_accu});
+ FuncEntryArray.push_back({AVG_INT_AMALG, "avg_int_amalg", AVG_DECIMAL_TRNANS_DATA_ID, {AVG_DECIMAL_TRNANS_DATA_ID}, avg_int32_amalg});
+ FuncEntryArray.push_back({AVG_INT_AVG, "avg_int_avg", DOUBLEID, {AVG_DECIMAL_TRNANS_DATA_ID}, avg_double_avg});
FuncEntryArray.push_back({AVG_BIGINT, "avg", DOUBLEID, {BIGINTID}});
- FuncEntryArray.push_back({AVG_BIGINT_ACCU, "avg_bigint_accu", STRUCTID, {BIGINTID}, avg_int64_accu});
- FuncEntryArray.push_back({AVG_BIGINT_AMALG, "avg_bigint_amalg", STRUCTID, {STRUCTID}, avg_int64_amalg});
- FuncEntryArray.push_back({AVG_BIGINT_AVG, "avg_bigint_avg", DOUBLEID, {STRUCTID}, avg_double_avg});
+ FuncEntryArray.push_back({AVG_BIGINT_ACCU, "avg_bigint_accu", AVG_DECIMAL_TRNANS_DATA_ID, {BIGINTID}, avg_int64_accu});
+ FuncEntryArray.push_back({AVG_BIGINT_AMALG, "avg_bigint_amalg", AVG_DECIMAL_TRNANS_DATA_ID, {AVG_DECIMAL_TRNANS_DATA_ID}, avg_int64_amalg});
+ FuncEntryArray.push_back({AVG_BIGINT_AVG, "avg_bigint_avg", DOUBLEID, {AVG_DECIMAL_TRNANS_DATA_ID}, avg_double_avg});
FuncEntryArray.push_back({AVG_FLOAT, "avg", DOUBLEID, {FLOATID}});
- FuncEntryArray.push_back({AVG_FLOAT_ACCU, "avg_float_accu", STRUCTID, {FLOATID}, avg_float_accu});
- FuncEntryArray.push_back({AVG_FLOAT_AMALG, "avg_float_amalg", STRUCTID, {STRUCTID}, avg_float_amalg});
- FuncEntryArray.push_back({AVG_FLOAT_AVG, "avg_float_avg", DOUBLEID, {STRUCTID}, avg_double_avg});
+ FuncEntryArray.push_back({AVG_FLOAT_ACCU, "avg_float_accu", AVG_DECIMAL_TRNANS_DATA_ID, {FLOATID}, avg_float_accu});
+ FuncEntryArray.push_back({AVG_FLOAT_AMALG, "avg_float_amalg", AVG_DECIMAL_TRNANS_DATA_ID, {AVG_DECIMAL_TRNANS_DATA_ID}, avg_float_amalg});
+ FuncEntryArray.push_back({AVG_FLOAT_AVG, "avg_float_avg", DOUBLEID, {AVG_DECIMAL_TRNANS_DATA_ID}, avg_double_avg});
FuncEntryArray.push_back({AVG_DOUBLE, "avg", DOUBLEID, {DOUBLEID}});
- FuncEntryArray.push_back({AVG_DOUBLE_ACCU, "avg_double_accu", STRUCTID, {DOUBLEID}, avg_double_accu});
- FuncEntryArray.push_back({AVG_DOUBLE_AMALG, "avg_double_amalg", STRUCTID, {STRUCTID}, avg_double_amalg});
- FuncEntryArray.push_back({AVG_DOUBLE_AVG, "avg_double_avg", DOUBLEID, {STRUCTID}, avg_double_avg});
+ FuncEntryArray.push_back({AVG_DOUBLE_ACCU, "avg_double_accu", AVG_DECIMAL_TRNANS_DATA_ID, {DOUBLEID}, avg_double_accu});
+ FuncEntryArray.push_back({AVG_DOUBLE_AMALG, "avg_double_amalg", AVG_DECIMAL_TRNANS_DATA_ID, {AVG_DECIMAL_TRNANS_DATA_ID}, avg_double_amalg});
+ FuncEntryArray.push_back({AVG_DOUBLE_AVG, "avg_double_avg", DOUBLEID, {AVG_DECIMAL_TRNANS_DATA_ID}, avg_double_avg});
FuncEntryArray.push_back({AVG_DECIMAL, "avg", DECIMALNEWID, {DECIMALNEWID}});
- FuncEntryArray.push_back({AVG_DECIMAL_ACCU, "avg_decimal_accu", STRUCTID, {DECIMALNEWID}, avg_decimal_accu});
- FuncEntryArray.push_back({AVG_DECIMAL_AMALG, "avg_decimal_amalg", STRUCTID, {STRUCTID}, avg_decimal_amalg});
- FuncEntryArray.push_back({AVG_DECIMAL_AVG, "avg_decimal_avg", DECIMALNEWID, {STRUCTID}, avg_decimal_avg});
+ FuncEntryArray.push_back({AVG_DECIMAL_ACCU, "avg_decimal_accu", AVG_DECIMAL_TRNANS_DATA_ID, {DECIMALNEWID}, avg_decimal_accu});
+ FuncEntryArray.push_back({AVG_DECIMAL_AMALG, "avg_decimal_amalg", AVG_DECIMAL_TRNANS_DATA_ID, {AVG_DECIMAL_TRNANS_DATA_ID}, avg_decimal_amalg});
+ FuncEntryArray.push_back({AVG_DECIMAL_AVG, "avg_decimal_avg", DECIMALNEWID, {AVG_DECIMAL_TRNANS_DATA_ID}, avg_decimal_avg});
FuncEntryArray.push_back({SUM_TINYINT, "sum", BIGINTID, {TINYINTID}});
FuncEntryArray.push_back({SUM_TINYINT_SUM, "sum_tinyint_sum", BIGINTID, {BIGINTID}, sum_int8_sum});
diff --git a/depends/dbcommon/src/dbcommon/testutil/vector-utils.h b/depends/dbcommon/src/dbcommon/testutil/vector-utils.h
index 431c9f0..f5ca7e6 100644
--- a/depends/dbcommon/src/dbcommon/testutil/vector-utils.h
+++ b/depends/dbcommon/src/dbcommon/testutil/vector-utils.h
@@ -181,8 +181,9 @@
static std::unique_ptr<Vector> generateSelectStructVector(
std::vector<std::unique_ptr<Vector>> &vecs, // NOLINT
- const std::vector<bool> *nulls, SelectList *sel) {
+ const std::vector<bool> *nulls, SelectList *sel, TypeKind typekind) {
std::unique_ptr<dbcommon::Vector> result(new dbcommon::StructVector(false));
+ result->setTypeKind(typekind);
for (int i = 0; i < vecs.size(); i++) {
result->addChildVector(std::move(vecs[i]));
}
diff --git a/depends/dbcommon/src/dbcommon/type/type-kind.h b/depends/dbcommon/src/dbcommon/type/type-kind.h
index 8769a1b..f519dee 100644
--- a/depends/dbcommon/src/dbcommon/type/type-kind.h
+++ b/depends/dbcommon/src/dbcommon/type/type-kind.h
@@ -93,6 +93,8 @@
IOBASETYPEID = 2004, // base type
STRUCTEXID = 2005, // struct extension
MAGMATID = 2006, // magma tid
+ AVG_DOUBLE_TRANS_DATA_ID = 2007,
+ AVG_DECIMAL_TRANS_DATA_ID = 2008,
// 2100~2199 format(ps:xml not support yet)
diff --git a/depends/dbcommon/test/unit/function/test-agg-func-has-no-group-by.cc b/depends/dbcommon/test/unit/function/test-agg-func-has-no-group-by.cc
index 6ecc9e3..ac03bde 100644
--- a/depends/dbcommon/test/unit/function/test-agg-func-has-no-group-by.cc
+++ b/depends/dbcommon/test/unit/function/test-agg-func-has-no-group-by.cc
@@ -259,7 +259,10 @@
vecs.push_back(std::move(vecSum));
vecs.push_back(std::move(vecCount));
std::unique_ptr<dbcommon::Vector> vec =
- VectorUtility::generateSelectStructVector(vecs, nullptr, nullptr);
+ VectorUtility::generateSelectStructVector(
+ vecs, nullptr, nullptr,
+ (TypeKind::DECIMALNEWID == TK ? AVG_DECIMAL_TRANS_DATA_ID
+ : AVG_DOUBLE_TRANS_DATA_ID));
grpVals = generateAggGroupValues<T>(initGrpVals, true, true);
std::vector<uint64_t> hashGroups = {0, 0, 1, 1, 1};
@@ -294,7 +297,10 @@
vecs.push_back(std::move(vecSum));
vecs.push_back(std::move(vecCount));
std::unique_ptr<dbcommon::Vector> vec =
- VectorUtility::generateSelectStructVector(vecs, nullptr, nullptr);
+ VectorUtility::generateSelectStructVector(
+ vecs, nullptr, nullptr,
+ (TypeKind::DECIMALNEWID == TK ? AVG_DECIMAL_TRANS_DATA_ID
+ : AVG_DOUBLE_TRANS_DATA_ID));
std::vector<uint64_t> hashGroups = {0, 1, 1};
SelectList sel = {0, 3, 4};
diff --git a/depends/dbcommon/test/unit/function/test-agg-func-small-scale.cc b/depends/dbcommon/test/unit/function/test-agg-func-small-scale.cc
index 66825b5..fa99eda 100644
--- a/depends/dbcommon/test/unit/function/test-agg-func-small-scale.cc
+++ b/depends/dbcommon/test/unit/function/test-agg-func-small-scale.cc
@@ -248,7 +248,10 @@
vecs.push_back(std::move(vecSum));
vecs.push_back(std::move(vecCount));
std::unique_ptr<dbcommon::Vector> vec =
- VectorUtility::generateSelectStructVector(vecs, nullptr, nullptr);
+ VectorUtility::generateSelectStructVector(
+ vecs, nullptr, nullptr,
+ (TypeKind::DECIMALNEWID == TK ? AVG_DECIMAL_TRANS_DATA_ID
+ : AVG_DOUBLE_TRANS_DATA_ID));
grpVals = generateAggGroupValues<T>(initGrpVals, true, true);
std::vector<uint64_t> hashGroups = {0, 0, 1, 1, 1};
@@ -283,7 +286,10 @@
vecs.push_back(std::move(vecSum));
vecs.push_back(std::move(vecCount));
std::unique_ptr<dbcommon::Vector> vec =
- VectorUtility::generateSelectStructVector(vecs, nullptr, nullptr);
+ VectorUtility::generateSelectStructVector(
+ vecs, nullptr, nullptr,
+ (TypeKind::DECIMALNEWID == TK ? AVG_DECIMAL_TRANS_DATA_ID
+ : AVG_DOUBLE_TRANS_DATA_ID));
std::vector<uint64_t> hashGroups = {0, 1, 1};
SelectList sel = {0, 3, 4};
diff --git a/depends/dbcommon/test/unit/function/test-agg-func.cc b/depends/dbcommon/test/unit/function/test-agg-func.cc
index 20e88e5..b1b8e48 100644
--- a/depends/dbcommon/test/unit/function/test-agg-func.cc
+++ b/depends/dbcommon/test/unit/function/test-agg-func.cc
@@ -248,7 +248,10 @@
vecs.push_back(std::move(vecSum));
vecs.push_back(std::move(vecCount));
std::unique_ptr<dbcommon::Vector> vec =
- VectorUtility::generateSelectStructVector(vecs, nullptr, nullptr);
+ VectorUtility::generateSelectStructVector(
+ vecs, nullptr, nullptr,
+ (TypeKind::DECIMALNEWID == TK ? AVG_DECIMAL_TRANS_DATA_ID
+ : AVG_DOUBLE_TRANS_DATA_ID));
grpVals = generateAggGroupValues<T>(initGrpVals, true, true);
std::vector<uint64_t> hashGroups = {0, 0, 1, 1, 1};
@@ -283,7 +286,10 @@
vecs.push_back(std::move(vecSum));
vecs.push_back(std::move(vecCount));
std::unique_ptr<dbcommon::Vector> vec =
- VectorUtility::generateSelectStructVector(vecs, nullptr, nullptr);
+ VectorUtility::generateSelectStructVector(
+ vecs, nullptr, nullptr,
+ (TypeKind::DECIMALNEWID == TK ? AVG_DECIMAL_TRANS_DATA_ID
+ : AVG_DOUBLE_TRANS_DATA_ID));
std::vector<uint64_t> hashGroups = {0, 1, 1};
SelectList sel = {0, 3, 4};
diff --git a/depends/storage/src/storage/format/orc/orc-format-reader.cc b/depends/storage/src/storage/format/orc/orc-format-reader.cc
index 071623e..12a60c9 100644
--- a/depends/storage/src/storage/format/orc/orc-format-reader.cc
+++ b/depends/storage/src/storage/format/orc/orc-format-reader.cc
@@ -263,6 +263,46 @@
assert(lv->isValid());
break;
}
+ case orc::ORCTypeKind::STRUCT: {
+ // XXX(chiyang): support struct vector only for aggregate intermediate
+ // output, as a result of which ORC file serves as workfile
+ orc::StructVectorBatch *structBatch =
+ dynamic_cast<orc::StructVectorBatch *>(b);
+ assert(structBatch->fields.size() == 2);
+ assert(structBatch->fields[0]->getType() == orc::ORCTypeKind::DOUBLE ||
+ structBatch->fields[0]->getType() == orc::ORCTypeKind::DECIMAL);
+ assert(structBatch->fields[1]->getType() == orc::ORCTypeKind::LONG);
+
+ bool isDecimal =
+ structBatch->fields[0]->getType() == orc::ORCTypeKind::DECIMAL;
+ auto vecSum = dbcommon::Vector::BuildVector(
+ isDecimal ? dbcommon::TypeKind::DECIMALID
+ : dbcommon::TypeKind::DOUBLEID,
+ false);
+ auto vecCount =
+ dbcommon::Vector::BuildVector(dbcommon::TypeKind::BIGINTID, false);
+ {
+ auto b0 = structBatch->fields[0];
+ vecSum->setValue(b0->getData(), b0->numElements * b0->getWidth());
+ vecSum->setHasNull(b0->hasNulls);
+ if (b0->hasNulls)
+ vecSum->setNotNulls(b0->getNotNull(), b0->numElements);
+ }
+ {
+ auto b1 = structBatch->fields[1];
+ vecCount->setValue(b1->getData(), b1->numElements * b1->getWidth());
+ vecCount->setHasNull(b1->hasNulls);
+ if (b1->hasNulls)
+ vecCount->setNotNulls(b1->getNotNull(), b1->numElements);
+ }
+
+ v->addChildVector(std::move(vecSum));
+ v->addChildVector(std::move(vecCount));
+ v->setTypeKind(isDecimal
+ ? dbcommon::TypeKind::AVG_DECIMAL_TRANS_DATA_ID
+ : dbcommon::TypeKind::AVG_DOUBLE_TRANS_DATA_ID);
+ break;
+ }
default:
LOG_ERROR(ERRCODE_FEATURE_NOT_SUPPORTED, "type %d not supported yet",
b->getType());
diff --git a/depends/storage/src/storage/format/orc/orc-format-writer.cc b/depends/storage/src/storage/format/orc/orc-format-writer.cc
index c5fd10d..e59d636 100644
--- a/depends/storage/src/storage/format/orc/orc-format-writer.cc
+++ b/depends/storage/src/storage/format/orc/orc-format-writer.cc
@@ -195,6 +195,22 @@
child->addStructField(name, std::move(grandchild));
ret->addStructField(name, std::move(child));
break;
+ case dbcommon::TypeKind::AVG_DOUBLE_TRANS_DATA_ID:
+ child.reset(new orc::TypeImpl(orc::ORCTypeKind::STRUCT));
+ grandchild.reset(new orc::TypeImpl(orc::ORCTypeKind::DOUBLE));
+ child->addStructField(name, std::move(grandchild));
+ grandchild.reset(new orc::TypeImpl(orc::ORCTypeKind::LONG));
+ child->addStructField(name, std::move(grandchild));
+ ret->addStructField(name, std::move(child));
+ break;
+ case dbcommon::TypeKind::AVG_DECIMAL_TRANS_DATA_ID:
+ child.reset(new orc::TypeImpl(orc::ORCTypeKind::STRUCT));
+ grandchild.reset(new orc::TypeImpl(orc::ORCTypeKind::DECIMAL));
+ child->addStructField(name, std::move(grandchild));
+ grandchild.reset(new orc::TypeImpl(orc::ORCTypeKind::LONG));
+ child->addStructField(name, std::move(grandchild));
+ ret->addStructField(name, std::move(child));
+ break;
default:
LOG_ERROR(ERRCODE_FEATURE_NOT_SUPPORTED,
"type not supported for orc: %d", t);
diff --git a/depends/storage/src/storage/format/orc/vector.h b/depends/storage/src/storage/format/orc/vector.h
index d88518c..8d3650e 100644
--- a/depends/storage/src/storage/format/orc/vector.h
+++ b/depends/storage/src/storage/format/orc/vector.h
@@ -340,8 +340,7 @@
}
std::unique_ptr<dbcommon::Vector> buildVector() override {
- LOG_ERROR(ERRCODE_INTERNAL_ERROR,
- "not implemented buildVector for StructVectorBatch");
+ return dbcommon::Vector::BuildVector(dbcommon::TypeKind::STRUCTID, false);
}
};
diff --git a/depends/storage/test/unit/format/test-orc-vector.cc b/depends/storage/test/unit/format/test-orc-vector.cc
index 7c89a7c..0491b84 100644
--- a/depends/storage/test/unit/format/test-orc-vector.cc
+++ b/depends/storage/test/unit/format/test-orc-vector.cc
@@ -66,8 +66,6 @@
EXPECT_THROW(vec.getWidth(), dbcommon::TransactionAbortException);
EXPECT_EQ(vec.getType(), ORCTypeKind::STRUCT);
EXPECT_THROW(vec.getData(), dbcommon::TransactionAbortException);
- EXPECT_THROW(vec.buildVector(), dbcommon::TransactionAbortException);
-
EXPECT_EQ(vec.hasVariableLength(), false);
EXPECT_EQ(vec.toString(),