| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| package org.apache.druid.query.groupby.strategy; |
| |
| import org.apache.druid.java.util.common.UOE; |
| import org.apache.druid.java.util.common.guava.Sequence; |
| import org.apache.druid.query.Query; |
| import org.apache.druid.query.QueryProcessingPool; |
| import org.apache.druid.query.QueryRunner; |
| import org.apache.druid.query.QueryRunnerFactory; |
| import org.apache.druid.query.context.ResponseContext; |
| import org.apache.druid.query.groupby.GroupByQuery; |
| import org.apache.druid.query.groupby.GroupByQueryQueryToolChest; |
| import org.apache.druid.query.groupby.ResultRow; |
| import org.apache.druid.query.groupby.resource.GroupByQueryResource; |
| import org.apache.druid.segment.StorageAdapter; |
| |
| import javax.annotation.Nullable; |
| |
| import java.util.Comparator; |
| import java.util.function.BinaryOperator; |
| |
| public interface GroupByStrategy |
| { |
| /** |
| * Initializes resources required to run {@link GroupByQueryQueryToolChest#mergeResults(QueryRunner)} for a |
| * particular query. That method is also the primary caller of this method. |
| * |
| * Used by {@link GroupByQueryQueryToolChest#mergeResults(QueryRunner)}. |
| * |
| * @param query a groupBy query to be processed |
| * |
| * @return broker resource |
| */ |
| GroupByQueryResource prepareResource(GroupByQuery query); |
| |
| /** |
| * Indicates if results from this query are cacheable or not. |
| * |
| * Used by {@link GroupByQueryQueryToolChest#getCacheStrategy(GroupByQuery)}. |
| * |
| * @param willMergeRunners indicates that {@link QueryRunnerFactory#mergeRunners(QueryProcessingPool, Iterable)} will be |
| * called on the cached by-segment results. Can be used to distinguish if we are running on |
| * a broker or data node. |
| * |
| * @return true if this strategy is cacheable, otherwise false. |
| */ |
| boolean isCacheable(boolean willMergeRunners); |
| |
| /** |
| * Indicates if this query should undergo "mergeResults" or not. Checked by |
| * {@link GroupByQueryQueryToolChest#mergeResults(QueryRunner)}. |
| */ |
| boolean doMergeResults(GroupByQuery query); |
| |
| /** |
| * Runs a provided {@link QueryRunner} on a provided {@link GroupByQuery}, which is assumed to return rows that are |
| * properly sorted (by timestamp and dimensions) but not necessarily fully merged (that is, there may be adjacent |
| * rows with the same timestamp and dimensions) and without PostAggregators computed. This method will fully merge |
| * the rows, apply PostAggregators, and return the resulting {@link Sequence}. |
| * |
| * The query will be modified before passing it down to the base runner. For example, "having" clauses will be |
| * removed and various context parameters will be adjusted. |
| * |
| * Despite the similar name, this method is much reduced in scope compared to |
| * {@link GroupByQueryQueryToolChest#mergeResults(QueryRunner)}. That method does delegate to this one at some points, |
| * but has a truckload of other responsibility, including computing outer query results (if there are subqueries), |
| * computing subtotals (like GROUPING SETS), and computing the havingSpec and limitSpec. |
| * |
| * @param baseRunner base query runner |
| * @param query the groupBy query to run inside the base query runner |
| * @param responseContext the response context to pass to the base query runner |
| * |
| * @return merged result sequence |
| */ |
| Sequence<ResultRow> mergeResults( |
| QueryRunner<ResultRow> baseRunner, |
| GroupByQuery query, |
| ResponseContext responseContext |
| ); |
| |
| /** |
| * See {@link org.apache.druid.query.QueryToolChest#createMergeFn(Query)} for details, allows |
| * {@link GroupByQueryQueryToolChest} to delegate implementation to the strategy |
| */ |
| @Nullable |
| default BinaryOperator<ResultRow> createMergeFn(Query<ResultRow> query) |
| { |
| return null; |
| } |
| |
| /** |
| * See {@link org.apache.druid.query.QueryToolChest#createResultComparator(Query)}, allows |
| * {@link GroupByQueryQueryToolChest} to delegate implementation to the strategy |
| */ |
| @Nullable |
| default Comparator<ResultRow> createResultComparator(Query<ResultRow> queryParam) |
| { |
| throw new UOE("%s doesn't provide a result comparator", this.getClass().getName()); |
| } |
| |
| /** |
| * Apply the {@link GroupByQuery} "postProcessingFn", which is responsible for HavingSpec and LimitSpec. |
| * |
| * @param results sequence of results |
| * @param query the groupBy query |
| * |
| * @return post-processed results, with HavingSpec and LimitSpec applied |
| */ |
| Sequence<ResultRow> applyPostProcessing(Sequence<ResultRow> results, GroupByQuery query); |
| |
| /** |
| * Called by {@link GroupByQueryQueryToolChest#mergeResults(QueryRunner)} when it needs to process a subquery. |
| * |
| * @param subquery inner query |
| * @param query outer query |
| * @param resource resources returned by {@link #prepareResource(GroupByQuery)} |
| * @param subqueryResult result rows from the subquery |
| * @param wasQueryPushedDown true if the outer query was pushed down (so we only need to merge the outer query's |
| * results, not run it from scratch like a normal outer query) |
| * |
| * @return results of the outer query |
| */ |
| Sequence<ResultRow> processSubqueryResult( |
| GroupByQuery subquery, |
| GroupByQuery query, |
| GroupByQueryResource resource, |
| Sequence<ResultRow> subqueryResult, |
| boolean wasQueryPushedDown |
| ); |
| |
| /** |
| * Called by {@link GroupByQueryQueryToolChest#mergeResults(QueryRunner)} when it needs to generate subtotals. |
| * |
| * @param query query that has a "subtotalsSpec" |
| * @param resource resources returned by {@link #prepareResource(GroupByQuery)} |
| * @param queryResult result rows from the main query |
| * |
| * @return results for each list of subtotals in the query, concatenated together |
| */ |
| Sequence<ResultRow> processSubtotalsSpec( |
| GroupByQuery query, |
| GroupByQueryResource resource, |
| Sequence<ResultRow> queryResult |
| ); |
| |
| /** |
| * Merge a variety of single-segment query runners into a combined runner. Used by |
| * {@link org.apache.druid.query.groupby.GroupByQueryRunnerFactory#mergeRunners(QueryProcessingPool, Iterable)}. In |
| * that sense, it is intended to go along with {@link #process(GroupByQuery, StorageAdapter)} (the runners created |
| * by that method will be fed into this method). |
| * <p> |
| * This method is only called on data servers, like Historicals (not the Broker). |
| * |
| * @param queryProcessingPool {@link QueryProcessingPool} service used for parallel execution of the query runners |
| * @param queryRunners collection of query runners to merge |
| * @return merged query runner |
| */ |
| QueryRunner<ResultRow> mergeRunners(QueryProcessingPool queryProcessingPool, Iterable<QueryRunner<ResultRow>> queryRunners); |
| |
| /** |
| * Process a groupBy query on a single {@link StorageAdapter}. This is used by |
| * {@link org.apache.druid.query.groupby.GroupByQueryRunnerFactory#createRunner} to create per-segment |
| * QueryRunners. |
| * |
| * This method is only called on data servers, like Historicals (not the Broker). |
| * |
| * @param query the groupBy query |
| * @param storageAdapter storage adatper for the segment in question |
| * |
| * @return result sequence for the storage adapter |
| */ |
| Sequence<ResultRow> process(GroupByQuery query, StorageAdapter storageAdapter); |
| |
| /** |
| * Returns whether this strategy supports pushing down outer queries. This is used by |
| * {@link GroupByQueryQueryToolChest#mergeResults(QueryRunner)} when it decides whether or not to push down an |
| * outer query from the Broker to data servers, like Historicals. |
| * |
| * Can be removed when the "v1" groupBy strategy is removed. ("v1" returns false, and "v2" returns true.) |
| */ |
| boolean supportsNestedQueryPushDown(); |
| } |