blob: d3967f51fcb7fc070df43ec6d550942d306e8d0e [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.pinot.integration.tests;
import com.fasterxml.jackson.databind.node.ObjectNode;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import org.apache.avro.Schema;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericRecord;
import org.apache.commons.lang.StringUtils;
import org.apache.pinot.spi.utils.JsonUtils;
/**
* The <code>QueryGenerator</code> class is used to generate random equivalent Pinot and H2 query pairs based on Avro
* files.
* <ul>
* <li>
* Supports COMPARISON, IN and BETWEEN predicate for both single-value and multi-value columns.
* <ul>
* <li>For multi-value columns, does not support NOT EQUAL and NOT IN.</li>
* <li>The reason for this restriction is that number of elements in multi-value columns is not fixed.</li>
* </ul>
* </li>
* <li>Supports single-value data type: BOOLEAN, INT, LONG, FLOAT, DOUBLE, STRING.</li>
* <li>Supports multi-value data type: INT, LONG, FLOAT, DOUBLE, STRING.</li>
* <li>
* Supports aggregation function: SUM, MIN, MAX, AVG, COUNT, DISTINCTCOUNT.
* <ul>
* <li>SUM, MIN, MAX, AVG can only work on numeric single-value columns.</li>
* <li>COUNT, DISTINCTCOUNT can work on any single-value columns.</li>
* </ul>
* </li>
* </ul>
*/
public class QueryGenerator {
// Configurable variables.
private static final int MAX_NUM_SELECTION_COLUMNS = 3;
private static final int MAX_NUM_AGGREGATION_COLUMNS = 3;
private static final int MAX_NUM_ORDER_BY_COLUMNS = 3;
private static final int MAX_NUM_GROUP_BY_COLUMNS = 3;
private static final int MAX_NUM_PREDICATES = 3;
private static final int MAX_NUM_IN_CLAUSE_VALUES = 5;
private static final int MAX_RESULT_LIMIT = 30;
private static final int MAX_INT_FOR_HAVING_CLAUSE_PREDICATE = 1000;
private static final float MAX_FLOAT_FOR_HAVING_CLAUSE_PREDICATE = 10000;
private static final List<String> BOOLEAN_OPERATORS = Arrays.asList("OR", "AND");
private static final List<String> COMPARISON_OPERATORS = Arrays.asList("=", "<>", "<", ">", "<=", ">=");
private static final List<String> AGGREGATION_FUNCTIONS =
Arrays.asList("SUM", "MIN", "MAX", "AVG", "COUNT", "DISTINCTCOUNT");
private static final List<String> MULTI_STAGE_AGGREGATION_FUNCTIONS =
Arrays.asList("SUM", "MIN", "MAX", "AVG", "COUNT");
private static final Random RANDOM = new Random();
private final Map<String, Set<String>> _columnToValueSet = new HashMap<>();
private final Map<String, List<String>> _columnToValueList = new HashMap<>();
private final List<String> _columnNames = new ArrayList<>();
private final List<String> _singleValueColumnNames = new ArrayList<>();
private final List<String> _singleValueNumericalColumnNames = new ArrayList<>();
private final Map<String, Integer> _multiValueColumnMaxNumElements = new HashMap<>();
private final List<QueryGenerationStrategy> _queryGenerationStrategies =
Arrays.asList(new SelectionQueryGenerationStrategy(), new AggregationQueryGenerationStrategy());
private final List<PredicateGenerator> _singleValuePredicateGenerators =
Arrays.asList(new SingleValueComparisonPredicateGenerator(), new SingleValueInPredicateGenerator(),
new SingleValueBetweenPredicateGenerator(), new SingleValueRegexPredicateGenerator());
private final List<PredicateGenerator> _multistageSingleValuePredicateGenerators =
Arrays.asList(new SingleValueComparisonPredicateGenerator(), new SingleValueInPredicateGenerator(),
new SingleValueBetweenPredicateGenerator());
private final List<PredicateGenerator> _multiValuePredicateGenerators =
Arrays.asList(new MultiValueComparisonPredicateGenerator(), new MultiValueInPredicateGenerator(),
new MultiValueBetweenPredicateGenerator());
private final String _pinotTableName;
private final String _h2TableName;
private boolean _skipMultiValuePredicates = false;
private boolean _useMultistageEngine = false;
/**
* Constructor for <code>QueryGenerator</code>.
*
* @param avroFiles list of Avro files.
* @param pinotTableName Pinot table name.
* @param h2TableName H2 table name.
*/
public QueryGenerator(List<File> avroFiles, String pinotTableName, String h2TableName) {
_pinotTableName = pinotTableName;
_h2TableName = h2TableName;
// Read Avro schema and initialize storage.
File schemaAvroFile = avroFiles.get(0);
GenericDatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
try (DataFileReader<GenericRecord> fileReader = new DataFileReader<>(schemaAvroFile, datumReader)) {
Schema schema = fileReader.getSchema();
for (Schema.Field field : schema.getFields()) {
String fieldName = field.name();
Schema fieldSchema = field.schema();
Schema.Type fieldType = fieldSchema.getType();
switch (fieldType) {
case UNION:
_columnNames.add(fieldName);
_columnToValueSet.put(fieldName, new HashSet<>());
Schema.Type type = fieldSchema.getTypes().get(0).getType();
if (type == Schema.Type.ARRAY) {
_multiValueColumnMaxNumElements.put(fieldName, 0);
} else {
_singleValueColumnNames.add(fieldName);
if (type != Schema.Type.STRING && type != Schema.Type.BOOLEAN) {
_singleValueNumericalColumnNames.add(fieldName);
}
}
break;
case ARRAY:
_columnNames.add(fieldName);
_columnToValueSet.put(fieldName, new HashSet<>());
_multiValueColumnMaxNumElements.put(fieldName, 0);
break;
case INT:
case LONG:
case FLOAT:
case DOUBLE:
_columnNames.add(fieldName);
_columnToValueSet.put(fieldName, new HashSet<>());
_singleValueColumnNames.add(fieldName);
_singleValueNumericalColumnNames.add(fieldName);
break;
case BOOLEAN:
case STRING:
_columnNames.add(fieldName);
_columnToValueSet.put(fieldName, new HashSet<>());
_singleValueColumnNames.add(fieldName);
break;
default:
break;
}
}
} catch (Exception e) {
throw new RuntimeException(e);
}
// Load Avro data into storage.
for (File avroFile : avroFiles) {
addAvroData(avroFile);
}
// Ignore multi-value columns with too many elements.
prepareToGenerateQueries();
}
/**
* Helper method to store an Avro value into the valid SQL String value set.
*
* @param valueSet value set.
* @param avroValue Avro value.
*/
private static void storeAvroValueIntoValueSet(Set<String> valueSet, Object avroValue) {
if (avroValue instanceof Number) {
// For Number object, store raw value.
valueSet.add(avroValue.toString());
} else {
// For non-Number object, escape single quote.
valueSet.add("'" + avroValue.toString().replace("'", "''") + "'");
}
}
/**
* Helper method to join several {@link String} elements with ' '.
*
* @param elements elements to be joined.
* @return joined result.
*/
private static String joinWithSpaces(String... elements) {
StringBuilder stringBuilder = new StringBuilder();
for (String element : elements) {
if (!element.isEmpty()) {
stringBuilder.append(element).append(' ');
}
}
return stringBuilder.substring(0, stringBuilder.length() - 1);
}
/**
* Sample main class for the query generator.
*
* @param args arguments.
*/
public static void main(String[] args)
throws Exception {
File avroFile = new File("pinot-integration-tests/src/test/resources/On_Time_On_Time_Performance_2014_1.avro");
QueryGenerator queryGenerator = new QueryGenerator(Collections.singletonList(avroFile), "mytable", "mytable");
File outputFile = new File(
"pinot-integration-tests/src/test/resources/On_Time_On_Time_Performance_2014_100k_subset.test_queries_10K.sql");
try (BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile))) {
for (int i = 0; i < 10000; i++) {
Query query = queryGenerator.generateQuery();
ObjectNode queryJson = JsonUtils.newObjectNode();
queryJson.put("sql", query.generatePinotQuery());
queryJson.set("hsqls", JsonUtils.objectToJsonNode(query.generateH2Query()));
writer.write(queryJson.toString());
writer.newLine();
}
}
}
/**
* Helper method to read in an Avro file and add data to the storage.
*
* @param avroFile Avro file.
*/
private void addAvroData(File avroFile) {
// Read in records and update the values stored.
GenericDatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
try (DataFileReader<GenericRecord> fileReader = new DataFileReader<>(avroFile, datumReader)) {
for (GenericRecord genericRecord : fileReader) {
for (String columnName : _columnNames) {
Set<String> values = _columnToValueSet.get(columnName);
// Turn the Avro value into a valid SQL String token.
Object avroValue = genericRecord.get(columnName);
if (avroValue != null) {
Integer storedMaxNumElements = _multiValueColumnMaxNumElements.get(columnName);
if (storedMaxNumElements != null) {
// Multi-value column
GenericData.Array array = (GenericData.Array) avroValue;
int numElements = array.size();
if (storedMaxNumElements < numElements) {
_multiValueColumnMaxNumElements.put(columnName, numElements);
}
for (Object element : array) {
storeAvroValueIntoValueSet(values, element);
}
} else {
// Single-value column
storeAvroValueIntoValueSet(values, avroValue);
}
}
}
}
} catch (Exception e) {
throw new RuntimeException(e);
}
}
/**
* Helper method to finish initialization of the <code>QueryGenerator</code>, removing multi-value columns with too
* many elements and dumping storage into the final map from column name to list of column values.
* <p>Called after all Avro data loaded.
*/
private void prepareToGenerateQueries() {
Iterator<String> columnNameIterator = _columnNames.iterator();
while (columnNameIterator.hasNext()) {
String columnName = columnNameIterator.next();
// Remove multi-value columns with more than MAX_NUM_ELEMENTS_IN_MULTI_VALUE_TO_COMPARE elements.
Integer maxNumElements = _multiValueColumnMaxNumElements.get(columnName);
if (maxNumElements != null
&& maxNumElements > ClusterIntegrationTestUtils.MAX_NUM_ELEMENTS_IN_MULTI_VALUE_TO_COMPARE) {
columnNameIterator.remove();
_multiValueColumnMaxNumElements.remove(columnName);
} else {
_columnToValueList.put(columnName, new ArrayList<>(_columnToValueSet.get(columnName)));
}
}
// Free the other copy of the data.
_columnToValueSet.clear();
}
/**
* Set whether to skip predicates on multi-value columns.
*
* @param skipMultiValuePredicates whether to skip predicates on multi-value columns.
*/
public void setSkipMultiValuePredicates(boolean skipMultiValuePredicates) {
_skipMultiValuePredicates = skipMultiValuePredicates;
}
public void setUseMultistageEngine(boolean useMultistageEngine) {
_useMultistageEngine = useMultistageEngine;
}
/**
* Helper method to pick a random value from the values list.
*
* @param list values list.
* @param <T> type of the value.
* @return randomly picked value.
*/
private <T> T pickRandom(List<T> list) {
return list.get(RANDOM.nextInt(list.size()));
}
private List<PredicateGenerator> getSingleValuePredicateGenerators() {
return _useMultistageEngine ? _multistageSingleValuePredicateGenerators : _singleValuePredicateGenerators;
}
/**
* Generate one selection or aggregation query.
*
* @return generated query.
*/
public Query generateQuery() {
return pickRandom(_queryGenerationStrategies).generateQuery();
}
/**
* Helper method to generate a predicate query fragment.
*
* @return generated predicate query fragment.
*/
private PredicateQueryFragment generatePredicate() {
// Generate at most MAX_NUM_PREDICATES predicates.
int predicateCount = RANDOM.nextInt(MAX_NUM_PREDICATES + 1);
List<QueryFragment> predicates = new ArrayList<>(predicateCount);
while (predicates.size() < predicateCount) {
String columnName = pickRandom(_columnNames);
if (!_columnToValueList.get(columnName).isEmpty()) {
if (!_multiValueColumnMaxNumElements.containsKey(columnName)) {
// Single-value column.
predicates.add(pickRandom(getSingleValuePredicateGenerators()).generatePredicate(columnName));
} else if (!_skipMultiValuePredicates) {
// Multi-value column.
predicates.add(pickRandom(_multiValuePredicateGenerators).generatePredicate(columnName));
}
}
}
if (predicateCount < 2) {
// No need to join.
return new PredicateQueryFragment(predicates, Collections.emptyList());
} else {
// Join predicates with ANDs and ORs.
List<QueryFragment> operators = new ArrayList<>(predicateCount - 1);
for (int i = 1; i < predicateCount; i++) {
operators.add(new StringQueryFragment(pickRandom(BOOLEAN_OPERATORS)));
}
return new PredicateQueryFragment(predicates, operators);
}
}
public interface Query {
String generatePinotQuery();
String generateH2Query();
}
private interface QueryFragment {
String generatePinotQuery();
String generateH2Query();
}
/**
* Query generation strategy interface with capability of generating query using specific strategy.
*/
private interface QueryGenerationStrategy {
/**
* Generate a query using specific strategy.
*
* @return generated query.
*/
Query generateQuery();
}
/**
* Predicate generator interface with capability of generating a predicate query fragment on a column.
*/
private interface PredicateGenerator {
/**
* Generate a predicate query fragment on a column.
*
* @param columnName column name.
* @return generated predicate query fragment.
*/
QueryFragment generatePredicate(String columnName);
}
/**
* Selection query.
*/
private class SelectionQuery implements Query {
final List<String> _projectionColumns;
final PredicateQueryFragment _predicate;
final OrderByQueryFragment _orderBy;
final LimitQueryFragment _limit;
/**
* Constructor for <code>SelectionQuery</code>.
*
* @param projectionColumns projection columns.
* @param orderBy order by fragment.
* @param predicate predicate fragment.
* @param limit limit fragment.
*/
public SelectionQuery(List<String> projectionColumns, PredicateQueryFragment predicate,
OrderByQueryFragment orderBy, LimitQueryFragment limit) {
_projectionColumns = projectionColumns;
_orderBy = orderBy;
_predicate = predicate;
_limit = limit;
}
@Override
public String generatePinotQuery() {
return joinWithSpaces("SELECT", StringUtils.join(_projectionColumns, ", "), "FROM", _pinotTableName,
_predicate.generatePinotQuery(), _orderBy.generatePinotQuery(), _limit.generatePinotQuery());
}
@Override
public String generateH2Query() {
List<String> h2ProjectionColumns = new ArrayList<>();
for (String projectionColumn : _projectionColumns) {
if (_multiValueColumnMaxNumElements.containsKey(projectionColumn)) {
// Multi-value column.
for (int i = 0; i < ClusterIntegrationTestUtils.MAX_NUM_ELEMENTS_IN_MULTI_VALUE_TO_COMPARE; i++) {
h2ProjectionColumns.add(projectionColumn + "__MV" + i);
}
} else {
// Single-value column.
h2ProjectionColumns.add(projectionColumn);
}
}
return joinWithSpaces("SELECT", StringUtils.join(h2ProjectionColumns, ", "), "FROM", _h2TableName,
_predicate.generateH2Query(), _orderBy.generateH2Query(), _limit.generateH2Query());
}
}
/**
* Aggregation query.
*/
private class AggregationQuery implements Query {
final List<String> _aggregateColumnsAndFunctions;
final PredicateQueryFragment _predicate;
final HavingQueryFragment _havingPredicate;
final Set<String> _groupColumns;
final LimitQueryFragment _limit;
/**
* Constructor for <code>AggregationQuery</code>.
*
* @param aggregateColumnsAndFunctions aggregation functions.
* @param predicate predicate fragment.
* @param groupColumns group-by columns.
* @param havingPredicate having predicate fragment.
* @param limit limit fragment.
*/
public AggregationQuery(List<String> aggregateColumnsAndFunctions, PredicateQueryFragment predicate,
Set<String> groupColumns, HavingQueryFragment havingPredicate, LimitQueryFragment limit) {
_aggregateColumnsAndFunctions = aggregateColumnsAndFunctions;
_predicate = predicate;
_groupColumns = groupColumns;
_havingPredicate = havingPredicate;
_limit = limit;
}
@Override
public String generatePinotQuery() {
if (_groupColumns.isEmpty()) {
return joinWithSpaces("SELECT", StringUtils.join(_aggregateColumnsAndFunctions, ", "), "FROM", _pinotTableName,
_predicate.generatePinotQuery());
} else {
return joinWithSpaces("SELECT", StringUtils.join(_aggregateColumnsAndFunctions, ", "), "FROM", _pinotTableName,
_predicate.generatePinotQuery(), "GROUP BY", StringUtils.join(_groupColumns, ", "),
_havingPredicate.generatePinotQuery(), _limit.generatePinotQuery());
}
}
@Override
public String generateH2Query() {
List<String> h2AggregateColumnAndFunctions = new ArrayList<>();
for (String aggregateColumnAndFunction : _aggregateColumnsAndFunctions) {
String h2AggregateColumnAndFunction;
// Make 'AVG' and
if (aggregateColumnAndFunction.startsWith("DISTINCTCOUNT(")) {
// make 'DISTINCTCOUNT(..)' compatible with H2 SQL query using 'COUNT(DISTINCT(..)'
h2AggregateColumnAndFunction = aggregateColumnAndFunction.replace("DISTINCTCOUNT(", "COUNT(DISTINCT ");
} else if (AGGREGATION_FUNCTIONS.contains(aggregateColumnAndFunction.substring(0, 3))) {
// make AGG functions (SUM, MIN, MAX, AVG) compatible with H2 SQL query.
// this is because Pinot queries casts all to double before doing aggregation
String aggFunctionName = aggregateColumnAndFunction.substring(0, 3);
h2AggregateColumnAndFunction = aggregateColumnAndFunction
.replace(aggFunctionName + "(", aggFunctionName + "(CAST(")
.replace(")", " AS DOUBLE))");
} else {
h2AggregateColumnAndFunction = aggregateColumnAndFunction;
}
h2AggregateColumnAndFunctions.add(h2AggregateColumnAndFunction);
}
if (_groupColumns.isEmpty()) {
return joinWithSpaces("SELECT", StringUtils.join(h2AggregateColumnAndFunctions, ", "), "FROM", _h2TableName,
_predicate.generateH2Query());
} else {
return joinWithSpaces("SELECT", StringUtils.join(h2AggregateColumnAndFunctions, ", "), "FROM", _h2TableName,
_predicate.generateH2Query(), "GROUP BY", StringUtils.join(_groupColumns, ", "),
_havingPredicate.generateH2Query(), _limit.generateH2Query());
}
}
}
/**
* Most basic query fragment.
*/
private static class StringQueryFragment implements QueryFragment {
final String _pinotQuery;
final String _h2Query;
/**
* Constructor with same Pinot and H2 query fragment.
*/
StringQueryFragment(String query) {
_pinotQuery = query;
_h2Query = query;
}
/**
* Constructor for <code>StringQueryFragment</code> with different Pinot and H2 query fragment.
*/
StringQueryFragment(String pinotQuery, String h2Query) {
_pinotQuery = pinotQuery;
_h2Query = h2Query;
}
@Override
public String generatePinotQuery() {
return _pinotQuery;
}
@Override
public String generateH2Query() {
return _h2Query;
}
}
/**
* Limit query fragment for selection queries.
* <ul>
* <li>SELECT ... FROM ... WHERE ... 'LIMIT ...'</li>
* </ul>
*/
private static class LimitQueryFragment extends StringQueryFragment {
LimitQueryFragment(int limit) {
// When limit is MAX_RESULT_LIMIT, construct query without LIMIT.
super(limit == MAX_RESULT_LIMIT ? "" : "LIMIT " + limit,
"LIMIT " + ClusterIntegrationTestUtils.MAX_NUM_ROWS_TO_COMPARE);
}
}
/**
* Order by query fragment for aggregation queries.
* <ul>
* <li>SELECT ... FROM ... WHERE ... 'ORDER BY ...'</li>
* </ul>
*/
private static class OrderByQueryFragment extends StringQueryFragment {
OrderByQueryFragment(Set<String> columns) {
super(columns.isEmpty() ? "" : "ORDER BY " + StringUtils.join(columns, ", "));
}
}
/**
* Predicate query fragment.
* <ul>
* <li>SELECT ... FROM ... 'WHERE ...'</li>
* </ul>
*/
private static class PredicateQueryFragment implements QueryFragment {
final List<QueryFragment> _predicates;
final List<QueryFragment> _operators;
/**
* Constructor for <code>PredicateQueryFragment</code>.
*
* @param predicates predicates.
* @param operators operators between predicates.
*/
PredicateQueryFragment(List<QueryFragment> predicates, List<QueryFragment> operators) {
_predicates = predicates;
_operators = operators;
}
@Override
public String generatePinotQuery() {
if (_predicates.isEmpty()) {
return "";
} else {
StringBuilder pinotQuery = new StringBuilder("WHERE ");
// One less than the number of predicates.
int operatorCount = _operators.size();
for (int i = 0; i < operatorCount; i++) {
pinotQuery.append(_predicates.get(i).generatePinotQuery()).append(' ')
.append(_operators.get(i).generatePinotQuery()).append(' ');
}
pinotQuery.append(_predicates.get(operatorCount).generatePinotQuery());
return pinotQuery.toString();
}
}
@Override
public String generateH2Query() {
if (_predicates.isEmpty()) {
return "";
} else {
StringBuilder h2Query = new StringBuilder("WHERE ");
// One less than the number of predicates.
int operatorCount = _operators.size();
for (int i = 0; i < operatorCount; i++) {
h2Query.append(_predicates.get(i).generateH2Query()).append(' ').append(_operators.get(i).generateH2Query())
.append(' ');
}
h2Query.append(_predicates.get(operatorCount).generateH2Query());
return h2Query.toString();
}
}
}
/**
* Having query fragment.
* <ul>
* <li>SELECT ... FROM ... WHERE ... GROUP BY ... 'HAVING ...'</li>
* </ul>
*/
private static class HavingQueryFragment implements QueryFragment {
final List<String> _havingClauseAggregationFunctions;
final List<String> _havingClauseOperatorsAndValues;
final List<String> _havingClauseBooleanOperators;
HavingQueryFragment(List<String> havingClauseAggregationFunctions, List<String> havingClauseOperatorsAndValues,
List<String> havingClauseBooleanOperators) {
_havingClauseAggregationFunctions = havingClauseAggregationFunctions;
_havingClauseOperatorsAndValues = havingClauseOperatorsAndValues;
_havingClauseBooleanOperators = havingClauseBooleanOperators;
}
@Override
public String generatePinotQuery() {
String pinotQuery = "";
int aggregationFunctionCount = _havingClauseAggregationFunctions.size();
if (aggregationFunctionCount > 0) {
pinotQuery += "HAVING";
pinotQuery = joinWithSpaces(pinotQuery, _havingClauseAggregationFunctions.get(0));
pinotQuery = joinWithSpaces(pinotQuery, _havingClauseOperatorsAndValues.get(0));
for (int i = 1; i < aggregationFunctionCount; i++) {
pinotQuery = joinWithSpaces(pinotQuery, _havingClauseBooleanOperators.get(i - 1));
pinotQuery = joinWithSpaces(pinotQuery, _havingClauseAggregationFunctions.get(i));
pinotQuery = joinWithSpaces(pinotQuery, _havingClauseOperatorsAndValues.get(i));
}
}
return pinotQuery;
}
@Override
public String generateH2Query() {
String h2Query = "";
int aggregationFunctionCount = _havingClauseAggregationFunctions.size();
if (aggregationFunctionCount > 0) {
h2Query += "HAVING";
String aggregationFunction = _havingClauseAggregationFunctions.get(0);
if (aggregationFunction.startsWith("AVG(")) {
aggregationFunction = aggregationFunction.replace("AVG(", "AVG(CAST(").replace(")", " AS DOUBLE))");
} else if (aggregationFunction.startsWith("DISTINCTCOUNT(")) {
aggregationFunction = aggregationFunction.replace("DISTINCTCOUNT(", "COUNT(DISTINCT ");
}
h2Query = joinWithSpaces(h2Query, aggregationFunction);
h2Query = joinWithSpaces(h2Query, _havingClauseOperatorsAndValues.get(0));
for (int i = 1; i < aggregationFunctionCount; i++) {
aggregationFunction = _havingClauseAggregationFunctions.get(i);
if (aggregationFunction.startsWith("AVG(")) {
aggregationFunction = aggregationFunction.replace("AVG(", "AVG(CAST(").replace(")", " AS DOUBLE))");
} else if (aggregationFunction.startsWith("DISTINCTCOUNT(")) {
aggregationFunction = aggregationFunction.replace("DISTINCTCOUNT(", "COUNT(DISTINCT ");
}
h2Query = joinWithSpaces(h2Query, _havingClauseBooleanOperators.get(i - 1));
h2Query = joinWithSpaces(h2Query, aggregationFunction);
h2Query = joinWithSpaces(h2Query, _havingClauseOperatorsAndValues.get(i));
}
}
return h2Query;
}
}
/**
* Strategy to generate selection queries.
* <ul>
* <li>SELECT a, b FROM table WHERE a = 'foo' AND b = 'bar' ORDER BY c LIMIT 10</li>
* </ul>
*/
private class SelectionQueryGenerationStrategy implements QueryGenerationStrategy {
@Override
public Query generateQuery() {
// Select at most MAX_NUM_SELECTION_COLUMNS columns.
int projectionColumnCount = Math.min(RANDOM.nextInt(MAX_NUM_SELECTION_COLUMNS) + 1, _columnNames.size());
if (_useMultistageEngine) {
projectionColumnCount = Math.min(projectionColumnCount, _singleValueColumnNames.size());
}
Set<String> projectionColumns = new HashSet<>();
while (projectionColumns.size() < projectionColumnCount) {
projectionColumns.add(pickRandom(_useMultistageEngine ? _singleValueColumnNames : _columnNames));
}
// Select at most MAX_NUM_ORDER_BY_COLUMNS columns for ORDER BY clause.
int orderByColumnCount = Math.min(RANDOM.nextInt(MAX_NUM_ORDER_BY_COLUMNS + 1), _singleValueColumnNames.size());
Set<String> orderByColumns = new HashSet<>();
while (orderByColumns.size() < orderByColumnCount) {
orderByColumns.add(pickRandom(_singleValueColumnNames));
}
// Generate a predicate.
PredicateQueryFragment predicate = generatePredicate();
// Generate a result limit of at most MAX_RESULT_LIMIT columns for ORDER BY clause.
int resultLimit = RANDOM.nextInt(MAX_RESULT_LIMIT + 1);
LimitQueryFragment limit = new LimitQueryFragment(resultLimit);
return new SelectionQuery(new ArrayList<>(projectionColumns), predicate, new OrderByQueryFragment(orderByColumns),
limit);
}
}
/**
* Strategy to generate aggregation queries.
* <ul>
* <li>SELECT SUM(a), MAX(b) FROM table WHERE a = 'foo' AND b = 'bar' GROUP BY c TOP 10</li>
* </ul>
*/
private class AggregationQueryGenerationStrategy implements QueryGenerationStrategy {
@Override
public Query generateQuery() {
// Generate at most MAX_NUM_AGGREGATION_COLUMNS columns on which to aggregate, map 0 to 'COUNT(*)'.
int aggregationColumnCount = RANDOM.nextInt(MAX_NUM_AGGREGATION_COLUMNS + 1);
Set<String> aggregationColumnsAndFunctions = new HashSet<>();
if (aggregationColumnCount == 0) {
aggregationColumnsAndFunctions.add("COUNT(*)");
} else {
while (aggregationColumnsAndFunctions.size() < aggregationColumnCount) {
aggregationColumnsAndFunctions.add(createRandomAggregationFunction());
}
}
// Generate a predicate.
PredicateQueryFragment predicate = generatePredicate();
// Generate at most MAX_NUM_GROUP_BY_COLUMNS columns on which to group.
int groupColumnCount = Math.min(RANDOM.nextInt(MAX_NUM_GROUP_BY_COLUMNS + 1), _singleValueColumnNames.size());
Set<String> groupColumns = new HashSet<>();
while (groupColumns.size() < groupColumnCount) {
groupColumns.add(pickRandom(_singleValueColumnNames));
}
//Generate a HAVING predicate
ArrayList<String> arrayOfAggregationColumnsAndFunctions = new ArrayList<>(aggregationColumnsAndFunctions);
HavingQueryFragment havingPredicate = generateHavingPredicate(arrayOfAggregationColumnsAndFunctions);
// Generate a result limit of at most MAX_RESULT_LIMIT.
LimitQueryFragment limit = new LimitQueryFragment(RANDOM.nextInt(MAX_RESULT_LIMIT + 1));
return new AggregationQuery(arrayOfAggregationColumnsAndFunctions, predicate, groupColumns, havingPredicate,
limit);
}
/**
* Helper method to generate a having predicate query fragment.
*
* @return generated predicate query fragment.
*/
private HavingQueryFragment generateHavingPredicate(ArrayList<String> arrayOfAggregationColumnsAndFunctions) {
//Generate a HAVING clause for group by query
List<String> havingClauseAggregationFunctions = new ArrayList<>();
List<String> havingClauseOperatorsAndValues = new ArrayList<>();
List<String> havingClauseBooleanOperators = new ArrayList<>();
createHavingClause(arrayOfAggregationColumnsAndFunctions, havingClauseAggregationFunctions,
havingClauseOperatorsAndValues, havingClauseBooleanOperators);
return new HavingQueryFragment(havingClauseAggregationFunctions, havingClauseOperatorsAndValues,
havingClauseBooleanOperators);
}
private void createHavingClause(ArrayList<String> arrayOfAggregationColumnsAndFunctions,
List<String> havingClauseAggregationFunctions, List<String> havingClauseOperatorsAndValues,
List<String> havingClauseBooleanOperators) {
int numOfFunctionsInSelectList = arrayOfAggregationColumnsAndFunctions.size();
int aggregationFunctionCount = RANDOM.nextInt(numOfFunctionsInSelectList + 1);
for (int i = 0; i < aggregationFunctionCount; i++) {
int aggregationFunctionIndex = RANDOM.nextInt(numOfFunctionsInSelectList);
String aggregationFunction = arrayOfAggregationColumnsAndFunctions.get(aggregationFunctionIndex);
havingClauseAggregationFunctions.add(aggregationFunction);
havingClauseOperatorsAndValues.add(createOperatorAndValueForAggregationFunction(aggregationFunction));
}
int aggregationFunctionNotInSelectListCount = RANDOM.nextInt(MAX_NUM_AGGREGATION_COLUMNS + 1);
for (int i = 0; i < aggregationFunctionNotInSelectListCount; i++) {
String aggregationFunction = createRandomAggregationFunction();
havingClauseAggregationFunctions.add(aggregationFunction);
havingClauseOperatorsAndValues.add(createOperatorAndValueForAggregationFunction(aggregationFunction));
}
aggregationFunctionCount += aggregationFunctionNotInSelectListCount;
for (int i = 1; i < aggregationFunctionCount; i++) {
havingClauseBooleanOperators.add(pickRandom(BOOLEAN_OPERATORS));
}
}
private String createRandomAggregationFunction() {
String aggregationFunction = pickRandom(_useMultistageEngine ? MULTI_STAGE_AGGREGATION_FUNCTIONS
: AGGREGATION_FUNCTIONS);
String aggregationColumn;
switch (aggregationFunction) {
// "COUNT" and "DISTINCTCOUNT" support all single-value columns.
case "COUNT":
case "DISTINCTCOUNT":
aggregationColumn = pickRandom(_singleValueColumnNames);
break;
// Other functions only support single-value numeric columns.
default:
aggregationColumn = pickRandom(_singleValueNumericalColumnNames);
break;
}
return aggregationFunction + "(" + aggregationColumn + ")";
}
private String createOperatorAndValueForAggregationFunction(String aggregationFunction) {
boolean generateInt = aggregationFunction.startsWith("COUNT") || aggregationFunction.startsWith("DISTINCTCOUNT");
boolean comparisonOperatorNotBetweenOrIN = RANDOM.nextBoolean();
if (comparisonOperatorNotBetweenOrIN) {
return pickRandom(COMPARISON_OPERATORS) + " " + generateRandomValue(generateInt);
} else {
boolean isItBetween = RANDOM.nextBoolean();
if (isItBetween) {
return "BETWEEN " + generateRandomValue(generateInt) + " AND " + generateRandomValue(generateInt);
} else {
int numValues = RANDOM.nextInt(MAX_NUM_IN_CLAUSE_VALUES) + 1;
Set<String> values = new HashSet<>();
while (values.size() < numValues) {
values.add(generateRandomValue(generateInt));
}
String valuesString = StringUtils.join(values, ", ");
boolean isItIn = RANDOM.nextBoolean();
return (isItIn ? "" : "NOT ") + "IN (" + valuesString + ")";
}
}
}
private String generateRandomValue(boolean generateInt) {
return generateInt ? Integer.toString(RANDOM.nextInt(MAX_INT_FOR_HAVING_CLAUSE_PREDICATE) + 1)
: Float.toString(RANDOM.nextFloat() * MAX_FLOAT_FOR_HAVING_CLAUSE_PREDICATE);
}
}
/**
* Generator for single-value column comparison predicate query fragment.
*/
private class SingleValueComparisonPredicateGenerator implements PredicateGenerator {
@Override
public QueryFragment generatePredicate(String columnName) {
String columnValue = pickRandom(_columnToValueList.get(columnName));
String comparisonOperator = pickRandom(COMPARISON_OPERATORS);
return new StringQueryFragment(joinWithSpaces(columnName, comparisonOperator, columnValue));
}
}
/**
* Generator for single-value column <code>IN</code> predicate query fragment.
*/
private class SingleValueInPredicateGenerator implements PredicateGenerator {
@Override
public QueryFragment generatePredicate(String columnName) {
List<String> columnValues = _columnToValueList.get(columnName);
int numValues = Math.min(RANDOM.nextInt(MAX_NUM_IN_CLAUSE_VALUES) + 1, columnValues.size());
Set<String> values = new HashSet<>();
while (values.size() < numValues) {
values.add(pickRandom(columnValues));
}
String inValues = StringUtils.join(values, ", ");
boolean notIn = RANDOM.nextBoolean();
if (notIn) {
return new StringQueryFragment(columnName + " NOT IN (" + inValues + ")");
} else {
return new StringQueryFragment(columnName + " IN (" + inValues + ")");
}
}
}
/**
* Generator for single-value column <code>BETWEEN</code> predicate query fragment.
*/
private class SingleValueBetweenPredicateGenerator implements PredicateGenerator {
@Override
public QueryFragment generatePredicate(String columnName) {
List<String> columnValues = _columnToValueList.get(columnName);
String leftValue = pickRandom(columnValues);
String rightValue = pickRandom(columnValues);
return new StringQueryFragment(columnName + " BETWEEN " + leftValue + " AND " + rightValue);
}
}
/**
* Generator for single-value column <code>REGEX</code> predicate query fragment.
*/
private class SingleValueRegexPredicateGenerator implements PredicateGenerator {
Random _random = new Random();
@Override
public QueryFragment generatePredicate(String columnName) {
List<String> columnValues = _columnToValueList.get(columnName);
String value = pickRandom(columnValues);
// do regex only for string type
if (value.startsWith("'") && value.endsWith("'")) {
// replace only one character for now with .* ignore the first and last character
int indexToReplaceWithRegex = 1 + _random.nextInt(value.length() - 2);
String regex = value.substring(1, indexToReplaceWithRegex) + ".*" + value.substring(indexToReplaceWithRegex + 1,
value.length() - 1);
return new StringQueryFragment(String.format(" REGEXP_LIKE(%s, '%s')", columnName, regex),
String.format(" REGEXP_LIKE(%s, '%s', 'i')", columnName, regex));
} else {
String equalsPredicate = String.format("%s = %s", columnName, value);
return new StringQueryFragment(equalsPredicate);
}
}
}
/**
* Generator for multi-value column comparison predicate query fragment.
* <p>DO NOT SUPPORT '<code>NOT EQUAL</code>'.
*/
private class MultiValueComparisonPredicateGenerator implements PredicateGenerator {
@Override
public QueryFragment generatePredicate(String columnName) {
String columnValue = pickRandom(_columnToValueList.get(columnName));
String comparisonOperator = pickRandom(COMPARISON_OPERATORS);
// Not equal works differently on multi-value, so avoid '<>' comparison.
while (comparisonOperator.equals("<>")) {
comparisonOperator = pickRandom(COMPARISON_OPERATORS);
}
List<String> h2ComparisonClauses =
new ArrayList<>(ClusterIntegrationTestUtils.MAX_NUM_ELEMENTS_IN_MULTI_VALUE_TO_COMPARE);
for (int i = 0; i < ClusterIntegrationTestUtils.MAX_NUM_ELEMENTS_IN_MULTI_VALUE_TO_COMPARE; i++) {
h2ComparisonClauses.add(joinWithSpaces(columnName + "__MV" + i, comparisonOperator, columnValue));
}
return new StringQueryFragment(joinWithSpaces(columnName, comparisonOperator, columnValue),
"(" + StringUtils.join(h2ComparisonClauses, " OR ") + ")");
}
}
/**
* Generator for multi-value column <code>IN</code> predicate query fragment.
* <p>DO NOT SUPPORT '<code>NOT IN</code>'.
*/
private class MultiValueInPredicateGenerator implements PredicateGenerator {
@Override
public QueryFragment generatePredicate(String columnName) {
List<String> columnValues = _columnToValueList.get(columnName);
int numValues = Math.min(RANDOM.nextInt(MAX_NUM_IN_CLAUSE_VALUES) + 1, columnValues.size());
Set<String> values = new HashSet<>();
while (values.size() < numValues) {
values.add(pickRandom(columnValues));
}
String inValues = StringUtils.join(values, ", ");
List<String> h2InClauses =
new ArrayList<>(ClusterIntegrationTestUtils.MAX_NUM_ELEMENTS_IN_MULTI_VALUE_TO_COMPARE);
for (int i = 0; i < ClusterIntegrationTestUtils.MAX_NUM_ELEMENTS_IN_MULTI_VALUE_TO_COMPARE; i++) {
h2InClauses.add(columnName + "__MV" + i + " IN (" + inValues + ")");
}
return new StringQueryFragment(columnName + " IN (" + inValues + ")",
"(" + StringUtils.join(h2InClauses, " OR ") + ")");
}
}
/**
* Generator for multi-value column <code>BETWEEN</code> predicate query fragment.
*/
private class MultiValueBetweenPredicateGenerator implements PredicateGenerator {
@Override
public QueryFragment generatePredicate(String columnName) {
List<String> columnValues = _columnToValueList.get(columnName);
String leftValue = pickRandom(columnValues);
String rightValue = pickRandom(columnValues);
List<String> h2ComparisonClauses =
new ArrayList<>(ClusterIntegrationTestUtils.MAX_NUM_ELEMENTS_IN_MULTI_VALUE_TO_COMPARE);
for (int i = 0; i < ClusterIntegrationTestUtils.MAX_NUM_ELEMENTS_IN_MULTI_VALUE_TO_COMPARE; i++) {
h2ComparisonClauses.add(columnName + "__MV" + i + " BETWEEN " + leftValue + " AND " + rightValue);
}
return new StringQueryFragment(columnName + " BETWEEN " + leftValue + " AND " + rightValue,
"(" + StringUtils.join(h2ComparisonClauses, " OR ") + ")");
}
}
}