blob: bfaa4cee19e9247e3fa1548078207f2c724f13f2 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.physical.base;
import com.fasterxml.jackson.annotation.JsonIgnore;
import java.util.Collection;
import java.util.List;
import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.drill.common.exceptions.ExecutionSetupException;
import org.apache.drill.common.expression.LogicalExpression;
import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.exec.expr.fn.FunctionImplementationRegistry;
import org.apache.drill.exec.metastore.analyze.AnalyzeInfoProvider;
import org.apache.drill.exec.ops.UdfUtilities;
import org.apache.drill.exec.physical.PhysicalOperatorSetupException;
import org.apache.drill.exec.planner.physical.PlannerSettings;
import org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint;
import org.apache.drill.exec.server.options.OptionManager;
import org.apache.drill.metastore.metadata.TableMetadata;
import org.apache.drill.metastore.metadata.TableMetadataProvider;
import com.google.common.collect.ImmutableList;
import org.apache.hadoop.fs.Path;
/**
* A GroupScan operator represents all data which will be scanned by a given physical
* plan. It is the superset of all SubScans for the plan.
*/
public interface GroupScan extends Scan, HasAffinity {
/**
* columns list in GroupScan : 1) empty_column is for skipAll query.
* 2) NULL is interpreted as ALL_COLUMNS.
* How to handle skipAll query is up to each storage plugin, with different policy in corresponding RecordReader.
*/
List<SchemaPath> ALL_COLUMNS = ImmutableList.of(SchemaPath.STAR_COLUMN);
void applyAssignments(List<DrillbitEndpoint> endpoints) throws PhysicalOperatorSetupException;
SubScan getSpecificScan(int minorFragmentId) throws ExecutionSetupException;
@JsonIgnore
int getMaxParallelizationWidth();
@JsonIgnore
boolean isDistributed();
/**
* At minimum, the GroupScan requires these many fragments to run.
* Currently, this is used in {@link org.apache.drill.exec.planner.fragment.SimpleParallelizer}
* @return the minimum number of fragments that should run
*/
@JsonIgnore
int getMinParallelizationWidth();
/**
* Check if GroupScan enforces width to be maximum parallelization width.
* Currently, this is used in {@link org.apache.drill.exec.planner.physical.visitor.ExcessiveExchangeIdentifier}
* @return if maximum width should be enforced
*
* @deprecated Use {@link #getMinParallelizationWidth()} to determine whether this GroupScan spans more than one
* fragment.
*/
@JsonIgnore
@Deprecated
boolean enforceWidth();
/**
* Returns a signature of the {@link GroupScan} which should usually be composed of
* all its attributes which could describe it uniquely.
*/
@JsonIgnore
String getDigest();
@JsonIgnore
ScanStats getScanStats(PlannerSettings settings);
@JsonIgnore
ScanStats getScanStats(RelMetadataQuery mq);
/**
* Returns a clone of GroupScan instance, except that the new GroupScan will use the provided list of columns .
*/
GroupScan clone(List<SchemaPath> columns);
/**
* GroupScan should check the list of columns, and see if it could support all the columns in the list.
*/
boolean canPushdownProjects(List<SchemaPath> columns);
/**
* Return the number of non-null value in the specified column. Raise exception, if groupscan does not
* have exact column row count.
*/
long getColumnValueCount(SchemaPath column);
/**
* Whether or not this GroupScan supports pushdown of partition filters (directories for filesystems)
*/
boolean supportsPartitionFilterPushdown();
/**
* Returns a list of columns scanned by this group scan
*
*/
List<SchemaPath> getColumns();
/**
* Returns a list of columns that can be used for partition pruning
*
*/
@JsonIgnore
List<SchemaPath> getPartitionColumns();
/**
* Whether or not this GroupScan supports limit pushdown
*/
boolean supportsLimitPushdown();
/**
* Apply rowcount based prune for "LIMIT n" query.
* @param maxRecords : the number of rows requested from group scan.
* @return a new instance of group scan if the prune is successful.
* null when either if row-based prune is not supported, or if prune is not successful.
*/
GroupScan applyLimit(int maxRecords);
/**
* Return true if this GroupScan can return its selection as a list of file names (retrieved by getFiles()).
*/
@JsonIgnore
boolean hasFiles();
/**
* Returns path to the selection root. If this GroupScan cannot provide selection root, it returns null.
*
* @return path to the selection root
*/
Path getSelectionRoot();
/**
* Returns a collection of file names associated with this GroupScan. This should be called after checking
* hasFiles(). If this GroupScan cannot provide file names, it returns null.
*
* @return collection of files paths
*/
Collection<Path> getFiles();
@JsonIgnore
LogicalExpression getFilter();
GroupScan applyFilter(LogicalExpression filterExpr, UdfUtilities udfUtilities,
FunctionImplementationRegistry functionImplementationRegistry, OptionManager optionManager);
/**
* Returns {@link TableMetadataProvider} instance which is used for providing metadata for current {@link GroupScan}.
*
* @return {@link TableMetadataProvider} instance the source of metadata
*/
@JsonIgnore
TableMetadataProvider getMetadataProvider();
@JsonIgnore
TableMetadata getTableMetadata();
/**
* Returns {@code true} if current group scan uses metadata obtained from the Metastore.
*
* @return {@code true} if current group scan uses metadata obtained from the Metastore, {@code false} otherwise.
*/
@JsonIgnore
boolean usedMetastore();
/**
* Returns {@link AnalyzeInfoProvider} instance which will be used when running ANALYZE statement.
*
* @return {@link AnalyzeInfoProvider} instance
*/
@JsonIgnore
AnalyzeInfoProvider getAnalyzeInfoProvider();
/**
* Checks whether this group scan supports filter push down.
*
* @return {@code true} if this group scan supports filter push down,
* {@code false} otherwise
*/
@JsonIgnore
boolean supportsFilterPushDown();
}