blob: 978e565c6148769ed08ea08c00b87ffb57df9bef [file] [log] [blame]
/*******************************************************************************
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
package org.apache.drill;
import com.google.common.base.Joiner;
import org.antlr.runtime.ANTLRStringStream;
import org.antlr.runtime.CommonTokenStream;
import org.antlr.runtime.RecognitionException;
import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.common.expression.parser.ExprLexer;
import org.apache.drill.common.expression.parser.ExprParser;
import org.apache.drill.common.types.TypeProtos;
import org.apache.drill.common.types.Types;
import org.apache.drill.common.util.TestTools;
import org.apache.drill.exec.memory.BufferAllocator;
import org.apache.drill.exec.proto.UserBitShared;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import static org.junit.Assert.assertEquals;
public class TestBuilder {
// test query to run
private String query;
// the type of query for the test
private UserBitShared.QueryType queryType;
// should the validation enforce ordering
private Boolean ordered;
private boolean approximateEquality;
private BufferAllocator allocator;
// Used to pass the type information associated with particular column names rather than relying on the
// ordering of the columns in the CSV file, or the default type inferences when reading JSON, this is used for the
// case where results of the test query are adding type casts to the baseline queries, this saves a little bit of
// setup in cases where strict type enforcement is not necessary for a given test
protected Map<SchemaPath, TypeProtos.MajorType> baselineTypeMap;
// queries to run before the baseline or test queries, can be used to set options
private String baselineOptionSettingQueries;
private String testOptionSettingQueries;
// two different methods are available for comparing ordered results, the default reads all of the records
// into giant lists of objects, like one giant on-heap batch of 'vectors'
// this flag enables the other approach which iterates through a hyper batch for the test query results and baseline
// while this does work faster and use less memory, it can be harder to debug as all of the elements are not in a
// single list
private boolean highPerformanceComparison;
// for cases where the result set is just a single record, test writers can avoid creating a lot of small baseline
// files by providing a list of baseline values
private Object[] baselineValues;
// column names for use with the baseline values
protected String[] baselineColumns;
// In cases where we need to verify larger datasets without the risk of running the baseline data through
// the drill engine, results can be provided in a list of maps. While this model does make a lot of sense, there is a
// lot of work to make the type handling/casting work correctly, and making robust complex type handling work completely outside
// of the drill engine for generating baselines would likely be more work than it would be worth. For now we will be
// going with an approach of using this facility to validate the parts of the drill engine that could break in ways
// that would affect the reading of baseline files (i.e. we need robust test for storage engines, project and casting that
// use this interface) and then rely on the engine for the rest of the tests that will use the baseline queries.
private List<Map> baselineRecords;
public TestBuilder(BufferAllocator allocator) {
this.allocator = allocator;
reset();
}
public TestBuilder(BufferAllocator allocator, String query, UserBitShared.QueryType queryType, Boolean ordered,
boolean approximateEquality, Map<SchemaPath, TypeProtos.MajorType> baselineTypeMap,
String baselineOptionSettingQueries, String testOptionSettingQueries, boolean highPerformanceComparison) {
this(allocator);
if (ordered == null) {
throw new RuntimeException("Ordering not set, when using a baseline file or query you must explicitly call the ordered() or unOrdered() method on the " + this.getClass().getSimpleName());
}
this.query = query;
this.queryType = queryType;
this.ordered = ordered;
this.approximateEquality = approximateEquality;
this.baselineTypeMap = baselineTypeMap;
this.baselineOptionSettingQueries = baselineOptionSettingQueries;
this.testOptionSettingQueries = testOptionSettingQueries;
this.highPerformanceComparison = highPerformanceComparison;
}
protected TestBuilder reset() {
query = "";
ordered = null;
approximateEquality = false;
highPerformanceComparison = false;
testOptionSettingQueries = "";
baselineOptionSettingQueries = "";
baselineRecords = null;
return this;
}
public DrillTestWrapper build() throws Exception {
if ( ! ordered && highPerformanceComparison ) {
throw new Exception("High performance comparison only available for ordered checks, to enforce this restriction, ordered() must be called first.");
}
return new DrillTestWrapper(this, allocator, query, queryType, baselineOptionSettingQueries, testOptionSettingQueries,
getValidationQueryType(), ordered, approximateEquality, highPerformanceComparison, baselineRecords);
}
public void go() throws Exception {
build().run();
}
public TestBuilder sqlQuery(String query) {
this.query = BaseTestQuery.normalizeQuery(query);
this.queryType = UserBitShared.QueryType.SQL;
return this;
}
public TestBuilder sqlQuery(String query, Object... replacements) {
return sqlQuery(String.format(query, replacements));
}
public TestBuilder sqlQueryFromFile(String queryFile) throws IOException {
String query = BaseTestQuery.getFile(queryFile);
this.query = query;
this.queryType = UserBitShared.QueryType.SQL;
return this;
}
public TestBuilder physicalPlanFromFile(String queryFile) throws IOException {
String query = BaseTestQuery.getFile(queryFile);
this.query = query;
this.queryType = UserBitShared.QueryType.PHYSICAL;
return this;
}
public TestBuilder ordered() {
this.ordered = true;
return this;
}
public TestBuilder unOrdered() {
this.ordered = false;
return this;
}
// this can only be used with ordered verifications, it does run faster and use less memory but may be
// a little harder to debug as it iterates over a hyper batch rather than reading all of the values into
// large on-heap lists
public TestBuilder highPerformanceComparison() throws Exception {
this.highPerformanceComparison = true;
return this;
}
// list of queries to run before the baseline query, can be used to set several options
// list takes the form of a semi-colon separated list
public TestBuilder optionSettingQueriesForBaseline(String queries) {
this.baselineOptionSettingQueries = queries;
return this;
}
// list of queries to run before the test query, can be used to set several options
// list takes the form of a semi-colon separated list
public TestBuilder optionSettingQueriesForTestQuery(String queries) {
this.testOptionSettingQueries = queries;
return this;
}
public TestBuilder approximateEquality() {
this.approximateEquality = true;
return this;
}
// modified code from SchemaPath.De class. This should be used sparingly and only in tests if absolutely needed.
public static SchemaPath parsePath(String path) {
try {
// logger.debug("Parsing expression string '{}'", expr);
ExprLexer lexer = new ExprLexer(new ANTLRStringStream(path));
CommonTokenStream tokens = new CommonTokenStream(lexer);
ExprParser parser = new ExprParser(tokens);
//TODO: move functionregistry and error collector to injectables.
//ctxt.findInjectableValue(valueId, forProperty, beanInstance)
ExprParser.parse_return ret = parser.parse();
// ret.e.resolveAndValidate(expr, errorCollector);
if (ret.e instanceof SchemaPath) {
return (SchemaPath) ret.e;
} else {
throw new IllegalStateException("Schema path is not a valid format.");
}
} catch (RecognitionException e) {
throw new RuntimeException(e);
}
}
String getValidationQuery() throws Exception {
throw new RuntimeException("Must provide some kind of baseline, either a baseline file or another query");
}
protected UserBitShared.QueryType getValidationQueryType() throws Exception {
if (singleExplicitBaselineRecord()) {
return null;
}
throw new RuntimeException("Must provide some kind of baseline, either a baseline file or another query");
}
public JSONTestBuilder jsonBaselineFile(String filePath) {
return new JSONTestBuilder(filePath, allocator, query, queryType, ordered, approximateEquality,
baselineTypeMap, baselineOptionSettingQueries, testOptionSettingQueries, highPerformanceComparison);
}
public CSVTestBuilder csvBaselineFile(String filePath) {
return new CSVTestBuilder(filePath, allocator, query, queryType, ordered, approximateEquality,
baselineTypeMap, baselineOptionSettingQueries, testOptionSettingQueries, highPerformanceComparison);
}
public TestBuilder baselineTypes(Map<SchemaPath, TypeProtos.MajorType> baselineTypeMap) {
this.baselineTypeMap = baselineTypeMap;
return this;
}
boolean typeInfoSet() {
if (baselineTypeMap != null) {
return true;
} else {
return false;
}
}
// indicate that the tests query should be checked for an empty result set
public TestBuilder expectsEmptyResultSet() {
unOrdered();
baselineRecords = new ArrayList();
return this;
}
/**
* This method is used to pass in a simple list of values for a single record verification without
* the need to create a CSV or JSON file to store the baseline.
*
* This can be called repeatedly to pass a list of records to verify. It works for both ordered and unordered
* checks.
*
* @param baselineValues - the baseline values to validate
* @return
*/
public TestBuilder baselineValues(Object ... baselineValues) {
if (ordered == null) {
throw new RuntimeException("Ordering not set, before specifying baseline data you must explicitly call the ordered() or unOrdered() method on the " + this.getClass().getSimpleName());
}
if (baselineRecords == null) {
baselineRecords = new ArrayList();
}
Map<String, Object> ret = new HashMap();
int i = 0;
assertEquals("Must supply the same number of baseline values as columns.", baselineValues.length, baselineColumns.length);
for (String s : baselineColumns) {
ret.put(s, baselineValues[i]);
i++;
}
this.baselineRecords.add(ret);
return this;
}
/**
* This can be used in cases where we want to avoid issues with the assumptions made by the test framework.
* Most of the methods for verification in the framework run drill queries to generate the read baseline files or
* execute alternative baseline queries. This model relies on basic functionality of reading files with storage
* plugins and applying casts/projects to be stable.
*
* This method can be used to verify the engine for these cases and any other future execution paths that would
* be used by both the test query and baseline. Without tests like this it is possible that some tests
* could falsely report as passing, as both the test query and baseline query could run into the same problem
* with an assumed stable code path and produce the same erroneous result.
*
* @param materializedRecords - a list of maps representing materialized results
* @return
*/
public TestBuilder baselineRecords(List<Map> materializedRecords) {
this.baselineRecords = materializedRecords;
return this;
}
/**
* This setting has a slightly different impact on the test depending on some of the other
* configuration options are set.
*
* If a JSON baseline file is given, this list will act as a project list to verify the
* test query against a subset of the columns in the file.
*
* For a CSV baseline file, these will act as aliases for columns [0 .. n] in the repeated
* varchar column that is read out of CSV.
*
* For a baseline sql query, this currently has no effect.
*
* For explicit baseline values given in java code with the baselineValues() method, these will
* be used to create a map for the one record verification.
*/
public TestBuilder baselineColumns(String... columns) {
for (int i = 0; i < columns.length; i++) {
columns[i] = parsePath(columns[i]).toExpr();
}
this.baselineColumns = columns;
return this;
}
private boolean singleExplicitBaselineRecord() {
return baselineRecords != null;
}
// provide a SQL query to validate against
public BaselineQueryTestBuilder sqlBaselineQuery(String baselineQuery) {
return new BaselineQueryTestBuilder(baselineQuery, UserBitShared.QueryType.SQL, allocator, query, queryType, ordered, approximateEquality,
baselineTypeMap, baselineOptionSettingQueries, testOptionSettingQueries, highPerformanceComparison);
}
// provide a path to a file containing a SQL query to use as a baseline
public BaselineQueryTestBuilder sqlBaselineQueryFromFile(String baselineQueryFilename) throws IOException {
String baselineQuery = BaseTestQuery.getFile(baselineQueryFilename);
return new BaselineQueryTestBuilder(baselineQuery, UserBitShared.QueryType.SQL, allocator, query, queryType, ordered, approximateEquality,
baselineTypeMap, baselineOptionSettingQueries, testOptionSettingQueries, highPerformanceComparison);
}
// as physical plans are verbose, this is the only option provided for specifying them, we should enforce
// that physical plans, or any large JSON strings do not live in the Java source as literals
public BaselineQueryTestBuilder physicalPlanBaselineQueryFromFile(String baselinePhysicalPlanPath) throws IOException {
String baselineQuery = BaseTestQuery.getFile(baselinePhysicalPlanPath);
return new BaselineQueryTestBuilder(baselineQuery, UserBitShared.QueryType.PHYSICAL, allocator, query, queryType, ordered, approximateEquality,
baselineTypeMap, baselineOptionSettingQueries, testOptionSettingQueries, highPerformanceComparison);
}
private String getDecimalPrecisionScaleInfo(TypeProtos.MajorType type) {
String precision = "";
switch(type.getMinorType()) {
case DECIMAL18:
case DECIMAL28SPARSE:
case DECIMAL38SPARSE:
case DECIMAL38DENSE:
case DECIMAL28DENSE:
case DECIMAL9:
precision = String.format("(%d,%d)", type.getPrecision(), type.getScale());
break;
default:
; // do nothing empty string set above
}
return precision;
}
public class CSVTestBuilder extends TestBuilder {
// path to the baseline file that will be inserted into the validation query
private String baselineFilePath;
// use to cast the baseline file columns, if not set the types
// that come out of the test query drive interpretation of baseline
private TypeProtos.MajorType[] baselineTypes;
CSVTestBuilder(String baselineFile, BufferAllocator allocator, String query, UserBitShared.QueryType queryType, Boolean ordered,
boolean approximateEquality, Map<SchemaPath, TypeProtos.MajorType> baselineTypeMap,
String baselineOptionSettingQueries, String testOptionSettingQueries, boolean highPerformanceComparison) {
super(allocator, query, queryType, ordered, approximateEquality, baselineTypeMap, baselineOptionSettingQueries, testOptionSettingQueries,
highPerformanceComparison);
this.baselineFilePath = baselineFile;
}
public CSVTestBuilder baselineTypes(TypeProtos.MajorType... baselineTypes) {
this.baselineTypes = baselineTypes;
this.baselineTypeMap = null;
return this;
}
// convenience method to convert minor types to major types if no decimals with precisions are needed
public CSVTestBuilder baselineTypes(TypeProtos.MinorType ... baselineTypes) {
TypeProtos.MajorType[] majorTypes = new TypeProtos.MajorType[baselineTypes.length];
int i = 0;
for(TypeProtos.MinorType minorType : baselineTypes) {
majorTypes[i] = Types.required(minorType);
i++;
}
this.baselineTypes = majorTypes;
this.baselineTypeMap = null;
return this;
}
protected TestBuilder reset() {
super.reset();
baselineTypeMap = null;
baselineTypes = null;
baselineFilePath = null;
return this;
}
boolean typeInfoSet() {
if (super.typeInfoSet() || baselineTypes != null) {
return true;
} else {
return false;
}
}
String getValidationQuery() throws Exception {
if (baselineColumns.length == 0) {
throw new Exception("Baseline CSV files require passing column names, please call the baselineColumns() method on the test builder.");
}
if (baselineTypes != null) {
assertEquals("Must pass the same number of types as column names if types are provided.", baselineTypes.length, baselineColumns.length);
}
String[] aliasedExpectedColumns = new String[baselineColumns.length];
for (int i = 0; i < baselineColumns.length; i++) {
aliasedExpectedColumns[i] = "columns[" + i + "] ";
TypeProtos.MajorType majorType;
if (baselineTypes != null) {
majorType = baselineTypes[i];
} else if (baselineTypeMap != null) {
majorType = baselineTypeMap.get(parsePath(baselineColumns[i]));
} else {
throw new Exception("Type information not set for interpreting csv baseline file.");
}
String precision = getDecimalPrecisionScaleInfo(majorType);
// TODO - determine if there is a better behavior here, if we do not specify a length the default behavior is
// to cast to varchar with length 1
// set default cast size for varchar, the cast function will take the lesser of this passed value and the
// length of the incoming data when choosing the length for the outgoing data
if (majorType.getMinorType() == TypeProtos.MinorType.VARCHAR ||
majorType.getMinorType() == TypeProtos.MinorType.VARBINARY) {
precision = "(65000)";
}
aliasedExpectedColumns[i] = "cast(" + aliasedExpectedColumns[i] + " as " +
Types.getNameOfMinorType(majorType.getMinorType()) + precision + " ) " + baselineColumns[i];
}
String query = "select " + Joiner.on(", ").join(aliasedExpectedColumns) + " from cp.`" + baselineFilePath + "`";
return query;
}
protected UserBitShared.QueryType getValidationQueryType() throws Exception {
return UserBitShared.QueryType.SQL;
}
}
public class JSONTestBuilder extends TestBuilder {
// path to the baseline file that will be inserted into the validation query
private String baselineFilePath;
JSONTestBuilder(String baselineFile, BufferAllocator allocator, String query, UserBitShared.QueryType queryType, Boolean ordered,
boolean approximateEquality, Map<SchemaPath, TypeProtos.MajorType> baselineTypeMap,
String baselineOptionSettingQueries, String testOptionSettingQueries, boolean highPerformanceComparison) {
super(allocator, query, queryType, ordered, approximateEquality, baselineTypeMap, baselineOptionSettingQueries, testOptionSettingQueries,
highPerformanceComparison);
this.baselineFilePath = baselineFile;
this.baselineColumns = new String[] {"*"};
}
String getValidationQuery() {
return "select " + Joiner.on(", ").join(baselineColumns) + " from cp.`" + baselineFilePath + "`";
}
protected UserBitShared.QueryType getValidationQueryType() throws Exception {
return UserBitShared.QueryType.SQL;
}
}
public class BaselineQueryTestBuilder extends TestBuilder {
private String baselineQuery;
private UserBitShared.QueryType baselineQueryType;
BaselineQueryTestBuilder(String baselineQuery, UserBitShared.QueryType baselineQueryType, BufferAllocator allocator,
String query, UserBitShared.QueryType queryType, Boolean ordered,
boolean approximateEquality, Map<SchemaPath, TypeProtos.MajorType> baselineTypeMap,
String baselineOptionSettingQueries, String testOptionSettingQueries, boolean highPerformanceComparison) {
super(allocator, query, queryType, ordered, approximateEquality, baselineTypeMap, baselineOptionSettingQueries, testOptionSettingQueries,
highPerformanceComparison);
this.baselineQuery = baselineQuery;
this.baselineQueryType = baselineQueryType;
}
String getValidationQuery() {
return baselineQuery;
}
protected UserBitShared.QueryType getValidationQueryType() throws Exception {
return baselineQueryType;
}
// This currently assumes that all explicit baseline queries will have fully qualified type information
// if this changes, the baseline query can be run in a sub query with the implicit or explicit type passing
// added on top of it, as is currently when done when reading a baseline file
boolean typeInfoSet() {
return true;
}
}
}