blob: 4237e50dc4737c7ad18b4ee04b7556648b8fa066 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.planning;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.Pair;
import org.apache.druid.query.BaseQuery;
import org.apache.druid.query.DataSource;
import org.apache.druid.query.JoinDataSource;
import org.apache.druid.query.Query;
import org.apache.druid.query.QueryDataSource;
import org.apache.druid.query.TableDataSource;
import org.apache.druid.query.UnionDataSource;
import org.apache.druid.query.spec.QuerySegmentSpec;
import javax.annotation.Nullable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
/**
* Analysis of a datasource for purposes of deciding how to execute a particular query.
*
* The analysis breaks a datasource down in the following way:
*
* <pre>
*
* Q <-- Possible outer query datasource(s) [may be multiple stacked]
* |
* J <-- Possible join tree, expected to be left-leaning
* / \
* J Dj <-- Other leaf datasources
* Base datasource / \ which will be joined
* (bottom-leftmost) --> Db Dj <---- into the base datasource
*
* </pre>
*
* The base datasource (Db) is returned by {@link #getBaseDataSource()}. The other leaf datasources are returned by
* {@link #getPreJoinableClauses()}. The outer query datasources are available as part of {@link #getDataSource()},
* which just returns the original datasource that was provided for analysis.
*
* The base datasource (Db) will never be a join, but it can be any other type of datasource (table, query, etc).
* Note that join trees are only flattened if they occur at the top of the overall tree (or underneath an outer query),
* and that join trees are only flattened to the degree that they are left-leaning. Due to these facts, it is possible
* for the base or leaf datasources to include additional joins.
*
* The base datasource is the one that will be considered by the core Druid query stack for scanning via
* {@link org.apache.druid.segment.Segment} and {@link org.apache.druid.segment.StorageAdapter}. The other leaf
* datasources must be joinable onto the base data.
*
* The idea here is to keep things simple and dumb. So we focus only on identifying left-leaning join trees, which map
* neatly onto a series of hash table lookups at query time. The user/system generating the queries, e.g. the druid-sql
* layer (or the end user in the case of native queries), is responsible for containing the smarts to structure the
* tree in a way that will lead to optimal execution.
*/
public class DataSourceAnalysis
{
private final DataSource dataSource;
private final DataSource baseDataSource;
@Nullable
private final QuerySegmentSpec baseQuerySegmentSpec;
private final List<PreJoinableClause> preJoinableClauses;
private DataSourceAnalysis(
DataSource dataSource,
DataSource baseDataSource,
@Nullable QuerySegmentSpec baseQuerySegmentSpec,
List<PreJoinableClause> preJoinableClauses
)
{
if (baseDataSource instanceof JoinDataSource) {
// The base cannot be a join (this is a class invariant).
// If it happens, it's a bug in the datasource analyzer.
throw new IAE("Base dataSource cannot be a join! Original dataSource was: %s", dataSource);
}
this.dataSource = dataSource;
this.baseDataSource = baseDataSource;
this.baseQuerySegmentSpec = baseQuerySegmentSpec;
this.preJoinableClauses = preJoinableClauses;
}
public static DataSourceAnalysis forDataSource(final DataSource dataSource)
{
// Strip outer queries, retaining querySegmentSpecs as we go down (lowest will become the 'baseQuerySegmentSpec').
QuerySegmentSpec baseQuerySegmentSpec = null;
DataSource current = dataSource;
while (current instanceof QueryDataSource) {
final Query<?> subQuery = ((QueryDataSource) current).getQuery();
if (!(subQuery instanceof BaseQuery)) {
// All builtin query types are BaseQuery, so we only expect this with funky extension queries.
throw new IAE("Cannot analyze subquery of class[%s]", subQuery.getClass().getName());
}
baseQuerySegmentSpec = ((BaseQuery<?>) subQuery).getQuerySegmentSpec();
current = subQuery.getDataSource();
}
if (current instanceof JoinDataSource) {
final Pair<DataSource, List<PreJoinableClause>> flattened = flattenJoin((JoinDataSource) current);
return new DataSourceAnalysis(dataSource, flattened.lhs, baseQuerySegmentSpec, flattened.rhs);
} else {
return new DataSourceAnalysis(dataSource, current, baseQuerySegmentSpec, Collections.emptyList());
}
}
/**
* Flatten a datasource into two parts: the left-hand side datasource (the 'base' datasource), and a list of join
* clauses, if any.
*
* @throws IllegalArgumentException if dataSource cannot be fully flattened.
*/
private static Pair<DataSource, List<PreJoinableClause>> flattenJoin(final JoinDataSource dataSource)
{
DataSource current = dataSource;
final List<PreJoinableClause> preJoinableClauses = new ArrayList<>();
while (current instanceof JoinDataSource) {
final JoinDataSource joinDataSource = (JoinDataSource) current;
current = joinDataSource.getLeft();
preJoinableClauses.add(
new PreJoinableClause(
joinDataSource.getRightPrefix(),
joinDataSource.getRight(),
joinDataSource.getJoinType(),
joinDataSource.getConditionAnalysis()
)
);
}
// Join clauses were added in the order we saw them while traversing down, but we need to apply them in the
// going-up order. So reverse them.
Collections.reverse(preJoinableClauses);
return Pair.of(current, preJoinableClauses);
}
/**
* Returns the topmost datasource: the original one passed to {@link #forDataSource(DataSource)}.
*/
public DataSource getDataSource()
{
return dataSource;
}
/**
* Returns the baseĀ (bottom-leftmost) datasource.
*/
public DataSource getBaseDataSource()
{
return baseDataSource;
}
/**
* Returns the same datasource as {@link #getBaseDataSource()}, but only if it is a table. Useful on data servers,
* since they generally can only handle queries where the base datasource is a table.
*/
public Optional<TableDataSource> getBaseTableDataSource()
{
if (baseDataSource instanceof TableDataSource) {
return Optional.of((TableDataSource) baseDataSource);
} else {
return Optional.empty();
}
}
/**
* Returns the {@link QuerySegmentSpec} that is associated with the base datasource, if any. This only happens
* when there is an outer query datasource. In this case, the base querySegmentSpec is the one associated with the
* innermost subquery.
*/
public Optional<QuerySegmentSpec> getBaseQuerySegmentSpec()
{
return Optional.ofNullable(baseQuerySegmentSpec);
}
/**
* Returns join clauses corresponding to joinable leaf datasources (every leaf except the bottom-leftmost).
*/
public List<PreJoinableClause> getPreJoinableClauses()
{
return preJoinableClauses;
}
/**
* Returns true if all servers have the ability to compute this datasource. These datasources depend only on
* globally broadcast data, like lookups or inline data.
*/
public boolean isGlobal()
{
return dataSource.isGlobal();
}
/**
* Returns true if this datasource can be computed by the core Druid query stack via a scan of a concrete base
* datasource. All other datasources involved, if any, must be global.
*/
public boolean isConcreteBased()
{
return baseDataSource.isConcrete() && preJoinableClauses.stream()
.allMatch(clause -> clause.getDataSource().isGlobal());
}
/**
* Returns true if this datasource is concrete-based (see {@link #isConcreteBased()}, and the base datasource is a
* 'table' or union of them. This is an important property because it corresponds to datasources that can be handled
* by Druid data servers, like Historicals.
*/
public boolean isConcreteTableBased()
{
// At the time of writing this comment, UnionDataSource children are required to be tables, so the instanceof
// check is redundant. But in the future, we will likely want to support unions of things other than tables,
// so check anyway for future-proofing.
return isConcreteBased() && (baseDataSource instanceof TableDataSource
|| (baseDataSource instanceof UnionDataSource &&
baseDataSource.getChildren()
.stream()
.allMatch(ds -> ds instanceof TableDataSource)));
}
/**
* Returns true if this datasource represents a subquery.
*/
public boolean isQuery()
{
return dataSource instanceof QueryDataSource;
}
@Override
public boolean equals(Object o)
{
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
DataSourceAnalysis that = (DataSourceAnalysis) o;
return Objects.equals(dataSource, that.dataSource) &&
Objects.equals(baseDataSource, that.baseDataSource) &&
Objects.equals(baseQuerySegmentSpec, that.baseQuerySegmentSpec) &&
Objects.equals(preJoinableClauses, that.preJoinableClauses);
}
@Override
public int hashCode()
{
return Objects.hash(dataSource, baseDataSource, baseQuerySegmentSpec, preJoinableClauses);
}
@Override
public String toString()
{
return "DataSourceAnalysis{" +
"dataSource=" + dataSource +
", baseDataSource=" + baseDataSource +
", baseQuerySegmentSpec=" + baseQuerySegmentSpec +
", joinClauses=" + preJoinableClauses +
'}';
}
}