blob: 94f9c7d72c4add8b6de949f0d1e022aa73a22055 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.lens.cube.parse;
import java.util.*;
import org.apache.lens.cube.error.ConflictingFields;
import org.apache.lens.cube.error.FieldsCannotBeQueriedTogetherException;
import org.apache.lens.cube.metadata.CubeInterface;
import org.apache.lens.cube.metadata.DerivedCube;
import org.apache.lens.cube.metadata.ReferencedDimAttribute;
import org.apache.lens.cube.metadata.ReferencedDimAttribute.ChainRefCol;
import org.apache.lens.cube.parse.ExpressionResolver.ExprSpecContext;
import org.apache.lens.server.api.error.LensException;
import org.apache.hadoop.hive.ql.parse.ASTNode;
import org.apache.hadoop.hive.ql.parse.HiveParser;
import com.google.common.collect.ImmutableSet;
import lombok.extern.slf4j.Slf4j;
/**
* Validate fields based on cube queryability
*/
@Slf4j
public class FieldValidator implements ContextRewriter {
@Override
public void rewriteContext(CubeQueryContext cubeql) throws LensException {
validateFields(cubeql);
}
public void validateFields(CubeQueryContext cubeql) throws LensException {
CubeInterface cube = cubeql.getCube();
if (cube == null) {
return;
}
if (!cube.allFieldsQueriable()) {
// do queried field validation
List<DerivedCube> dcubes;
dcubes = cubeql.getMetastoreClient().getAllDerivedQueryableCubes(cube);
ImmutableSet<String> queriedTimeDimCols = cubeql.getQueriedTimeDimCols();
Set<String> queriedDimAttrs = new LinkedHashSet<String>(queriedTimeDimCols);
Set<String> nonQueryableFields = new LinkedHashSet<String>(queriedTimeDimCols);
Set<String> queriedMsrs = new LinkedHashSet<String>(cubeql.getQueriedMsrs());
queriedMsrs.addAll(getMeasuresFromExprMeasures(cubeql));
Set<String> chainedSrcColumns = new HashSet<String>();
// dim attributes and chained source columns should only come from WHERE and GROUP BY ASTs
findDimAttrsAndChainSourceColumns(cubeql, cubeql.getGroupByAST(), queriedDimAttrs, chainedSrcColumns,
nonQueryableFields);
findDimAttrsAndChainSourceColumns(cubeql, cubeql.getWhereAST(), queriedDimAttrs,
chainedSrcColumns, nonQueryableFields);
// do validation
// Find atleast one derived cube which contains all the dimensions
// queried.
log.info("Chained columns: {}, Queried dim attributes: {}", chainedSrcColumns, queriedDimAttrs);
boolean derivedCubeFound = false;
for (DerivedCube dcube : dcubes) {
if (dcube.getDimAttributeNames().containsAll(chainedSrcColumns)
&& dcube.getDimAttributeNames().containsAll(queriedDimAttrs)) {
log.info("Derived cube found: {}", dcube.getName());
// remove all the measures that are covered
queriedMsrs.removeAll(dcube.getMeasureNames());
derivedCubeFound = true;
}
}
final SortedSet<String> conflictingFields = new TreeSet<String>();
if (!derivedCubeFound && !nonQueryableFields.isEmpty()) {
conflictingFields.addAll(nonQueryableFields);
throw new FieldsCannotBeQueriedTogetherException(new ConflictingFields(conflictingFields));
}
if (!queriedMsrs.isEmpty()) {
log.info("Uncovered queried Measures: {}", queriedMsrs);
// Add appropriate message to know which fields are not queryable together
if (!nonQueryableFields.isEmpty()) {
log.info("Non queryable fields: {}", nonQueryableFields);
conflictingFields.addAll(nonQueryableFields);
conflictingFields.addAll(queriedMsrs);
throw new FieldsCannotBeQueriedTogetherException(new ConflictingFields(conflictingFields));
} else {
conflictingFields.addAll(queriedMsrs);
throw new FieldsCannotBeQueriedTogetherException(new ConflictingFields(conflictingFields));
}
}
}
}
private Set<String> getMeasuresFromExprMeasures(CubeQueryContext cubeql) {
Set<String> exprMeasures = new HashSet<String>();
String cubeAlias = cubeql.getAliasForTableName(cubeql.getCube().getName());
for (String expr : cubeql.getQueriedExprsWithMeasures()) {
for (ExprSpecContext esc : cubeql.getExprCtx().getExpressionContext(expr, cubeAlias).getAllExprs()) {
if (esc.getTblAliasToColumns().get(cubeAlias) != null) {
for (String cubeCol : esc.getTblAliasToColumns().get(cubeAlias)) {
if (cubeql.getCube().getMeasureByName(cubeCol) != null) {
exprMeasures.add(cubeCol);
}
}
}
}
}
return exprMeasures;
}
// Traverse parse tree to figure out dimension attributes of the cubes and join chains
// present in the AST.
private void findDimAttrsAndChainSourceColumns(final CubeQueryContext cubeql,
final ASTNode tree,
final Set<String> dimAttributes,
final Set<String> chainSourceColumns,
final Set<String> nonQueryableColumns) throws LensException {
if (tree == null || !cubeql.hasCubeInQuery()) {
return;
}
final CubeInterface cube = cubeql.getCube();
HQLParser.bft(tree, new HQLParser.ASTNodeVisitor() {
@Override
public void visit(HQLParser.TreeNode treeNode) throws LensException {
ASTNode astNode = treeNode.getNode();
if (astNode.getToken().getType() == HiveParser.DOT) {
// At this point alias replacer has run, so all columns are of the type table.column name
ASTNode aliasNode = HQLParser.findNodeByPath((ASTNode) astNode.getChild(0), HiveParser.Identifier);
String tabName = aliasNode.getText().toLowerCase().trim();
ASTNode colNode = (ASTNode) astNode.getChild(1);
String colName = colNode.getText().toLowerCase().trim();
// Check if table is a join chain
if (cubeql.getJoinchains().containsKey(tabName)) {
// this 'tabName' is a join chain, so add all source columns
chainSourceColumns.addAll(cubeql.getJoinchains().get(tabName).getSourceColumns());
nonQueryableColumns.add(tabName + "." + colName);
} else if (tabName.equalsIgnoreCase(cubeql.getAliasForTableName(cube.getName()))) {
if (cube.getDimAttributeNames().contains(colName)) {
// Alternatively, check if this is a dimension attribute, if yes add it to the dim attribute set
// and non queryable fields set
nonQueryableColumns.add(colName);
// If this is a referenced dim attribute leading to a chain, then instead of adding this
// column, we add the source columns of the chain.
if (cube.getDimAttributeByName(colName) instanceof ReferencedDimAttribute) {
ReferencedDimAttribute rdim = (ReferencedDimAttribute) cube.getDimAttributeByName(colName);
for (ChainRefCol refCol : rdim.getChainRefColumns()) {
chainSourceColumns.addAll(cube.getChainByName(refCol.getChainName()).getSourceColumns());
}
} else {
// This is a dim attribute, needs to be validated
dimAttributes.add(colName);
}
} else if (cube.getExpressionNames().contains(colName)) {
if (cubeql.getQueriedExprs().contains(colName)) {
for (ASTNode exprNode : cubeql.getExprCtx().getExpressionContext(colName,
cubeql.getAliasForTableName(cubeql.getCube().getName())).getAllASTNodes()) {
findDimAttrsAndChainSourceColumns(cubeql, exprNode, dimAttributes, chainSourceColumns,
nonQueryableColumns);
}
}
}
}
}
}
});
}
}