processing/src/main/java/org/apache/druid/query/planning/DataSourceAnalysis.java - druid - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 package org.apache.druid.query.planning;

 import org.apache.druid.java.util.common.IAE;
 import org.apache.druid.java.util.common.Pair;
 import org.apache.druid.query.BaseQuery;
 import org.apache.druid.query.DataSource;
 import org.apache.druid.query.JoinDataSource;
 import org.apache.druid.query.Query;
 import org.apache.druid.query.QueryDataSource;
 import org.apache.druid.query.TableDataSource;
 import org.apache.druid.query.UnionDataSource;
 import org.apache.druid.query.spec.QuerySegmentSpec;

 import javax.annotation.Nullable;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import java.util.Objects;
 import java.util.Optional;

 /**
  * Analysis of a datasource for purposes of deciding how to execute a particular query.
  *
  * The analysis breaks a datasource down in the following way:
  *
  * <pre>
  *
  *                             Q  <-- Possible outer query datasource(s) [may be multiple stacked]
  *                             |
  *                             J  <-- Possible join tree, expected to be left-leaning
  *                            / \
  *                           J  Dj <--  Other leaf datasources
  *   Base datasource        / \         which will be joined
  *  (bottom-leftmost) -->  Db Dj  <---- into the base datasource
  *
  * </pre>
  *
  * The base datasource (Db) is returned by {@link #getBaseDataSource()}. The other leaf datasources are returned by
  * {@link #getPreJoinableClauses()}. The outer query datasources are available as part of {@link #getDataSource()},
  * which just returns the original datasource that was provided for analysis.
  *
  * The base datasource (Db) will never be a join, but it can be any other type of datasource (table, query, etc).
  * Note that join trees are only flattened if they occur at the top of the overall tree (or underneath an outer query),
  * and that join trees are only flattened to the degree that they are left-leaning. Due to these facts, it is possible
  * for the base or leaf datasources to include additional joins.
  *
  * The base datasource is the one that will be considered by the core Druid query stack for scanning via
  * {@link org.apache.druid.segment.Segment} and {@link org.apache.druid.segment.StorageAdapter}. The other leaf
  * datasources must be joinable onto the base data.
  *
  * The idea here is to keep things simple and dumb. So we focus only on identifying left-leaning join trees, which map
  * neatly onto a series of hash table lookups at query time. The user/system generating the queries, e.g. the druid-sql
  * layer (or the end user in the case of native queries), is responsible for containing the smarts to structure the
  * tree in a way that will lead to optimal execution.
  */
 public class DataSourceAnalysis
 {
   private final DataSource dataSource;
   private final DataSource baseDataSource;
   @Nullable
   private final QuerySegmentSpec baseQuerySegmentSpec;
   private final List<PreJoinableClause> preJoinableClauses;

   private DataSourceAnalysis(
       DataSource dataSource,
       DataSource baseDataSource,
       @Nullable QuerySegmentSpec baseQuerySegmentSpec,
       List<PreJoinableClause> preJoinableClauses
   )
   {
     if (baseDataSource instanceof JoinDataSource) {
       // The base cannot be a join (this is a class invariant).
       // If it happens, it's a bug in the datasource analyzer.
       throw new IAE("Base dataSource cannot be a join! Original dataSource was: %s", dataSource);
     }

     this.dataSource = dataSource;
     this.baseDataSource = baseDataSource;
     this.baseQuerySegmentSpec = baseQuerySegmentSpec;
     this.preJoinableClauses = preJoinableClauses;
   }

   public static DataSourceAnalysis forDataSource(final DataSource dataSource)
   {
     // Strip outer queries, retaining querySegmentSpecs as we go down (lowest will become the 'baseQuerySegmentSpec').
     QuerySegmentSpec baseQuerySegmentSpec = null;
     DataSource current = dataSource;

     while (current instanceof QueryDataSource) {
       final Query<?> subQuery = ((QueryDataSource) current).getQuery();

       if (!(subQuery instanceof BaseQuery)) {
         // All builtin query types are BaseQuery, so we only expect this with funky extension queries.
         throw new IAE("Cannot analyze subquery of class[%s]", subQuery.getClass().getName());
       }

       baseQuerySegmentSpec = ((BaseQuery<?>) subQuery).getQuerySegmentSpec();
       current = subQuery.getDataSource();
     }

     if (current instanceof JoinDataSource) {
       final Pair<DataSource, List<PreJoinableClause>> flattened = flattenJoin((JoinDataSource) current);
       return new DataSourceAnalysis(dataSource, flattened.lhs, baseQuerySegmentSpec, flattened.rhs);
     } else {
       return new DataSourceAnalysis(dataSource, current, baseQuerySegmentSpec, Collections.emptyList());
     }
   }

   /**
    * Flatten a datasource into two parts: the left-hand side datasource (the 'base' datasource), and a list of join
    * clauses, if any.
    *
    * @throws IllegalArgumentException if dataSource cannot be fully flattened.
    */
   private static Pair<DataSource, List<PreJoinableClause>> flattenJoin(final JoinDataSource dataSource)
   {
     DataSource current = dataSource;
     final List<PreJoinableClause> preJoinableClauses = new ArrayList<>();

     while (current instanceof JoinDataSource) {
       final JoinDataSource joinDataSource = (JoinDataSource) current;
       current = joinDataSource.getLeft();
       preJoinableClauses.add(
           new PreJoinableClause(
               joinDataSource.getRightPrefix(),
               joinDataSource.getRight(),
               joinDataSource.getJoinType(),
               joinDataSource.getConditionAnalysis()
           )
       );
     }

     // Join clauses were added in the order we saw them while traversing down, but we need to apply them in the
     // going-up order. So reverse them.
     Collections.reverse(preJoinableClauses);

     return Pair.of(current, preJoinableClauses);
   }

   /**
    * Returns the topmost datasource: the original one passed to {@link #forDataSource(DataSource)}.
    */
   public DataSource getDataSource()
   {
     return dataSource;
   }

   /**
    * Returns the base (bottom-leftmost) datasource.
    */
   public DataSource getBaseDataSource()
   {
     return baseDataSource;
   }

   /**
    * Returns the same datasource as {@link #getBaseDataSource()}, but only if it is a table. Useful on data servers,
    * since they generally can only handle queries where the base datasource is a table.
    */
   public Optional<TableDataSource> getBaseTableDataSource()
   {
     if (baseDataSource instanceof TableDataSource) {
       return Optional.of((TableDataSource) baseDataSource);
     } else {
       return Optional.empty();
     }
   }

   /**
    * Returns the {@link QuerySegmentSpec} that is associated with the base datasource, if any. This only happens
    * when there is an outer query datasource. In this case, the base querySegmentSpec is the one associated with the
    * innermost subquery.
    */
   public Optional<QuerySegmentSpec> getBaseQuerySegmentSpec()
   {
     return Optional.ofNullable(baseQuerySegmentSpec);
   }

   /**
    * Returns join clauses corresponding to joinable leaf datasources (every leaf except the bottom-leftmost).
    */
   public List<PreJoinableClause> getPreJoinableClauses()
   {
     return preJoinableClauses;
   }

   /**
    * Returns true if all servers have the ability to compute this datasource. These datasources depend only on
    * globally broadcast data, like lookups or inline data.
    */
   public boolean isGlobal()
   {
     return dataSource.isGlobal();
   }

   /**
    * Returns true if this datasource can be computed by the core Druid query stack via a scan of a concrete base
    * datasource. All other datasources involved, if any, must be global.
    */
   public boolean isConcreteBased()
   {
     return baseDataSource.isConcrete() && preJoinableClauses.stream()
                                                             .allMatch(clause -> clause.getDataSource().isGlobal());
   }

   /**
    * Returns true if this datasource is concrete-based (see {@link #isConcreteBased()}, and the base datasource is a
    * 'table' or union of them. This is an important property because it corresponds to datasources that can be handled
    * by Druid data servers, like Historicals.
    */
   public boolean isConcreteTableBased()
   {
     // At the time of writing this comment, UnionDataSource children are required to be tables, so the instanceof
     // check is redundant. But in the future, we will likely want to support unions of things other than tables,
     // so check anyway for future-proofing.
     return isConcreteBased() && (baseDataSource instanceof TableDataSource
                                  || (baseDataSource instanceof UnionDataSource &&
                                      baseDataSource.getChildren()
                                                    .stream()
                                                    .allMatch(ds -> ds instanceof TableDataSource)));
   }

   /**
    * Returns true if this datasource represents a subquery.
    */
   public boolean isQuery()
   {
     return dataSource instanceof QueryDataSource;
   }

   @Override
   public boolean equals(Object o)
   {
     if (this == o) {
       return true;
     }
     if (o == null || getClass() != o.getClass()) {
       return false;
     }
     DataSourceAnalysis that = (DataSourceAnalysis) o;
     return Objects.equals(dataSource, that.dataSource) &&
            Objects.equals(baseDataSource, that.baseDataSource) &&
            Objects.equals(baseQuerySegmentSpec, that.baseQuerySegmentSpec) &&
            Objects.equals(preJoinableClauses, that.preJoinableClauses);
   }

   @Override
   public int hashCode()
   {
     return Objects.hash(dataSource, baseDataSource, baseQuerySegmentSpec, preJoinableClauses);
   }

   @Override
   public String toString()
   {
     return "DataSourceAnalysis{" +
            "dataSource=" + dataSource +
            ", baseDataSource=" + baseDataSource +
            ", baseQuerySegmentSpec=" + baseQuerySegmentSpec +
            ", joinClauses=" + preJoinableClauses +
            '}';
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/

	package org.apache.druid.query.planning;

	import org.apache.druid.java.util.common.IAE;
	import org.apache.druid.java.util.common.Pair;
	import org.apache.druid.query.BaseQuery;
	import org.apache.druid.query.DataSource;
	import org.apache.druid.query.JoinDataSource;
	import org.apache.druid.query.Query;
	import org.apache.druid.query.QueryDataSource;
	import org.apache.druid.query.TableDataSource;
	import org.apache.druid.query.UnionDataSource;
	import org.apache.druid.query.spec.QuerySegmentSpec;

	import javax.annotation.Nullable;
	import java.util.ArrayList;
	import java.util.Collections;
	import java.util.List;
	import java.util.Objects;
	import java.util.Optional;

	/**
	* Analysis of a datasource for purposes of deciding how to execute a particular query.
	*
	* The analysis breaks a datasource down in the following way:
	*
	* <pre>
	*
	* Q <-- Possible outer query datasource(s) [may be multiple stacked]
	* \|
	* J <-- Possible join tree, expected to be left-leaning
	* / \
	* J Dj <-- Other leaf datasources
	* Base datasource / \ which will be joined
	* (bottom-leftmost) --> Db Dj <---- into the base datasource
	*
	* </pre>
	*
	* The base datasource (Db) is returned by {@link #getBaseDataSource()}. The other leaf datasources are returned by
	* {@link #getPreJoinableClauses()}. The outer query datasources are available as part of {@link #getDataSource()},
	* which just returns the original datasource that was provided for analysis.
	*
	* The base datasource (Db) will never be a join, but it can be any other type of datasource (table, query, etc).
	* Note that join trees are only flattened if they occur at the top of the overall tree (or underneath an outer query),
	* and that join trees are only flattened to the degree that they are left-leaning. Due to these facts, it is possible
	* for the base or leaf datasources to include additional joins.
	*
	* The base datasource is the one that will be considered by the core Druid query stack for scanning via
	* {@link org.apache.druid.segment.Segment} and {@link org.apache.druid.segment.StorageAdapter}. The other leaf
	* datasources must be joinable onto the base data.
	*
	* The idea here is to keep things simple and dumb. So we focus only on identifying left-leaning join trees, which map
	* neatly onto a series of hash table lookups at query time. The user/system generating the queries, e.g. the druid-sql
	* layer (or the end user in the case of native queries), is responsible for containing the smarts to structure the
	* tree in a way that will lead to optimal execution.
	*/
	public class DataSourceAnalysis
	{
	private final DataSource dataSource;
	private final DataSource baseDataSource;
	@Nullable
	private final QuerySegmentSpec baseQuerySegmentSpec;
	private final List<PreJoinableClause> preJoinableClauses;

	private DataSourceAnalysis(
	DataSource dataSource,
	DataSource baseDataSource,
	@Nullable QuerySegmentSpec baseQuerySegmentSpec,
	List<PreJoinableClause> preJoinableClauses
	)
	{
	if (baseDataSource instanceof JoinDataSource) {
	// The base cannot be a join (this is a class invariant).
	// If it happens, it's a bug in the datasource analyzer.
	throw new IAE("Base dataSource cannot be a join! Original dataSource was: %s", dataSource);
	}

	this.dataSource = dataSource;
	this.baseDataSource = baseDataSource;
	this.baseQuerySegmentSpec = baseQuerySegmentSpec;
	this.preJoinableClauses = preJoinableClauses;
	}

	public static DataSourceAnalysis forDataSource(final DataSource dataSource)
	{
	// Strip outer queries, retaining querySegmentSpecs as we go down (lowest will become the 'baseQuerySegmentSpec').
	QuerySegmentSpec baseQuerySegmentSpec = null;
	DataSource current = dataSource;

	while (current instanceof QueryDataSource) {
	final Query<?> subQuery = ((QueryDataSource) current).getQuery();

	if (!(subQuery instanceof BaseQuery)) {
	// All builtin query types are BaseQuery, so we only expect this with funky extension queries.
	throw new IAE("Cannot analyze subquery of class[%s]", subQuery.getClass().getName());
	}

	baseQuerySegmentSpec = ((BaseQuery<?>) subQuery).getQuerySegmentSpec();
	current = subQuery.getDataSource();
	}

	if (current instanceof JoinDataSource) {
	final Pair<DataSource, List<PreJoinableClause>> flattened = flattenJoin((JoinDataSource) current);
	return new DataSourceAnalysis(dataSource, flattened.lhs, baseQuerySegmentSpec, flattened.rhs);
	} else {
	return new DataSourceAnalysis(dataSource, current, baseQuerySegmentSpec, Collections.emptyList());
	}
	}

	/**
	* Flatten a datasource into two parts: the left-hand side datasource (the 'base' datasource), and a list of join
	* clauses, if any.
	*
	* @throws IllegalArgumentException if dataSource cannot be fully flattened.
	*/
	private static Pair<DataSource, List<PreJoinableClause>> flattenJoin(final JoinDataSource dataSource)
	{
	DataSource current = dataSource;
	final List<PreJoinableClause> preJoinableClauses = new ArrayList<>();

	while (current instanceof JoinDataSource) {
	final JoinDataSource joinDataSource = (JoinDataSource) current;
	current = joinDataSource.getLeft();
	preJoinableClauses.add(
	new PreJoinableClause(
	joinDataSource.getRightPrefix(),
	joinDataSource.getRight(),
	joinDataSource.getJoinType(),
	joinDataSource.getConditionAnalysis()
	)
	);
	}

	// Join clauses were added in the order we saw them while traversing down, but we need to apply them in the
	// going-up order. So reverse them.
	Collections.reverse(preJoinableClauses);

	return Pair.of(current, preJoinableClauses);
	}

	/**
	* Returns the topmost datasource: the original one passed to {@link #forDataSource(DataSource)}.
	*/
	public DataSource getDataSource()
	{
	return dataSource;
	}

	/**
	* Returns the base (bottom-leftmost) datasource.
	*/
	public DataSource getBaseDataSource()
	{
	return baseDataSource;
	}

	/**
	* Returns the same datasource as {@link #getBaseDataSource()}, but only if it is a table. Useful on data servers,
	* since they generally can only handle queries where the base datasource is a table.
	*/
	public Optional<TableDataSource> getBaseTableDataSource()
	{
	if (baseDataSource instanceof TableDataSource) {
	return Optional.of((TableDataSource) baseDataSource);
	} else {
	return Optional.empty();
	}
	}

	/**
	* Returns the {@link QuerySegmentSpec} that is associated with the base datasource, if any. This only happens
	* when there is an outer query datasource. In this case, the base querySegmentSpec is the one associated with the
	* innermost subquery.
	*/
	public Optional<QuerySegmentSpec> getBaseQuerySegmentSpec()
	{
	return Optional.ofNullable(baseQuerySegmentSpec);
	}

	/**
	* Returns join clauses corresponding to joinable leaf datasources (every leaf except the bottom-leftmost).
	*/
	public List<PreJoinableClause> getPreJoinableClauses()
	{
	return preJoinableClauses;
	}

	/**
	* Returns true if all servers have the ability to compute this datasource. These datasources depend only on
	* globally broadcast data, like lookups or inline data.
	*/
	public boolean isGlobal()
	{
	return dataSource.isGlobal();
	}

	/**
	* Returns true if this datasource can be computed by the core Druid query stack via a scan of a concrete base
	* datasource. All other datasources involved, if any, must be global.
	*/
	public boolean isConcreteBased()
	{
	return baseDataSource.isConcrete() && preJoinableClauses.stream()
	.allMatch(clause -> clause.getDataSource().isGlobal());
	}

	/**
	* Returns true if this datasource is concrete-based (see {@link #isConcreteBased()}, and the base datasource is a
	* 'table' or union of them. This is an important property because it corresponds to datasources that can be handled
	* by Druid data servers, like Historicals.
	*/
	public boolean isConcreteTableBased()
	{
	// At the time of writing this comment, UnionDataSource children are required to be tables, so the instanceof
	// check is redundant. But in the future, we will likely want to support unions of things other than tables,
	// so check anyway for future-proofing.
	return isConcreteBased() && (baseDataSource instanceof TableDataSource
	\|\| (baseDataSource instanceof UnionDataSource &&
	baseDataSource.getChildren()
	.stream()
	.allMatch(ds -> ds instanceof TableDataSource)));
	}

	/**
	* Returns true if this datasource represents a subquery.
	*/
	public boolean isQuery()
	{
	return dataSource instanceof QueryDataSource;
	}

	@Override
	public boolean equals(Object o)
	{
	if (this == o) {
	return true;
	}
	if (o == null \|\| getClass() != o.getClass()) {
	return false;
	}
	DataSourceAnalysis that = (DataSourceAnalysis) o;
	return Objects.equals(dataSource, that.dataSource) &&
	Objects.equals(baseDataSource, that.baseDataSource) &&
	Objects.equals(baseQuerySegmentSpec, that.baseQuerySegmentSpec) &&
	Objects.equals(preJoinableClauses, that.preJoinableClauses);
	}

	@Override
	public int hashCode()
	{
	return Objects.hash(dataSource, baseDataSource, baseQuerySegmentSpec, preJoinableClauses);
	}

	@Override
	public String toString()
	{
	return "DataSourceAnalysis{" +
	"dataSource=" + dataSource +
	", baseDataSource=" + baseDataSource +
	", baseQuerySegmentSpec=" + baseQuerySegmentSpec +
	", joinClauses=" + preJoinableClauses +
	'}';
	}
	}