exec/java-exec/src/main/java/org/apache/drill/exec/physical/base/GroupScan.java - drill - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.drill.exec.physical.base;

 import com.fasterxml.jackson.annotation.JsonIgnore;
 import java.util.Collection;
 import java.util.List;

 import org.apache.calcite.rel.metadata.RelMetadataQuery;
 import org.apache.drill.common.exceptions.ExecutionSetupException;
 import org.apache.drill.common.expression.LogicalExpression;
 import org.apache.drill.common.expression.SchemaPath;
 import org.apache.drill.exec.expr.fn.FunctionImplementationRegistry;
 import org.apache.drill.exec.metastore.analyze.AnalyzeInfoProvider;
 import org.apache.drill.exec.ops.UdfUtilities;
 import org.apache.drill.exec.physical.PhysicalOperatorSetupException;
 import org.apache.drill.exec.planner.physical.PlannerSettings;
 import org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint;
 import org.apache.drill.exec.server.options.OptionManager;
 import org.apache.drill.metastore.metadata.TableMetadata;
 import org.apache.drill.metastore.metadata.TableMetadataProvider;
 import com.google.common.collect.ImmutableList;
 import org.apache.hadoop.fs.Path;

 /**
  * A GroupScan operator represents all data which will be scanned by a given physical
  * plan. It is the superset of all SubScans for the plan.
  */
 public interface GroupScan extends Scan, HasAffinity {

   /**
    * columns list in GroupScan : 1) empty_column is for skipAll query.
    *                             2) NULL is interpreted as ALL_COLUMNS.
    *  How to handle skipAll query is up to each storage plugin, with different policy in corresponding RecordReader.
    */
   List<SchemaPath> ALL_COLUMNS = ImmutableList.of(SchemaPath.STAR_COLUMN);

   void applyAssignments(List<DrillbitEndpoint> endpoints) throws PhysicalOperatorSetupException;

   SubScan getSpecificScan(int minorFragmentId) throws ExecutionSetupException;

   @JsonIgnore
   int getMaxParallelizationWidth();

   @JsonIgnore
   boolean isDistributed();

   /**
    * At minimum, the GroupScan requires these many fragments to run.
    * Currently, this is used in {@link org.apache.drill.exec.planner.fragment.SimpleParallelizer}
    * @return the minimum number of fragments that should run
    */
   @JsonIgnore
   int getMinParallelizationWidth();

   /**
    * Check if GroupScan enforces width to be maximum parallelization width.
    * Currently, this is used in {@link org.apache.drill.exec.planner.physical.visitor.ExcessiveExchangeIdentifier}
    * @return if maximum width should be enforced
    *
    * @deprecated Use {@link #getMinParallelizationWidth()} to determine whether this GroupScan spans more than one
    * fragment.
    */
   @JsonIgnore
   @Deprecated
   boolean enforceWidth();

   /**
    * Returns a signature of the {@link GroupScan} which should usually be composed of
    * all its attributes which could describe it uniquely.
    */
   @JsonIgnore
   String getDigest();

   @JsonIgnore
   ScanStats getScanStats(PlannerSettings settings);

   @JsonIgnore
   ScanStats getScanStats(RelMetadataQuery mq);

   /**
    * Returns a clone of GroupScan instance, except that the new GroupScan will use the provided list of columns .
    */
   GroupScan clone(List<SchemaPath> columns);

   /**
    * GroupScan should check the list of columns, and see if it could support all the columns in the list.
    */
   boolean canPushdownProjects(List<SchemaPath> columns);

   /**
    * Return the number of non-null value in the specified column. Raise exception, if groupscan does not
    * have exact column row count.
    */
   long getColumnValueCount(SchemaPath column);

   /**
    * Whether or not this GroupScan supports pushdown of partition filters (directories for filesystems)
    */
   boolean supportsPartitionFilterPushdown();

   /**
    * Returns a list of columns scanned by this group scan
    *
    */
   List<SchemaPath> getColumns();

   /**
    * Returns a list of columns that can be used for partition pruning
    *
    */
   @JsonIgnore
   List<SchemaPath> getPartitionColumns();

   /**
    * Whether or not this GroupScan supports limit pushdown
    */
   boolean supportsLimitPushdown();

   /**
    * Apply rowcount based prune for "LIMIT n" query.
    * @param maxRecords : the number of rows requested from group scan.
    * @return  a new instance of group scan if the prune is successful.
    *          null when either if row-based prune is not supported, or if prune is not successful.
    */
   GroupScan applyLimit(int maxRecords);

   /**
    * Return true if this GroupScan can return its selection as a list of file names (retrieved by getFiles()).
    */
   @JsonIgnore
   boolean hasFiles();

   /**
    * Returns path to the selection root. If this GroupScan cannot provide selection root, it returns null.
    *
    * @return path to the selection root
    */
   Path getSelectionRoot();

   /**
    * Returns a collection of file names associated with this GroupScan. This should be called after checking
    * hasFiles(). If this GroupScan cannot provide file names, it returns null.
    *
    * @return collection of files paths
    */
   Collection<Path> getFiles();

   @JsonIgnore
   LogicalExpression getFilter();

   GroupScan applyFilter(LogicalExpression filterExpr, UdfUtilities udfUtilities,
                         FunctionImplementationRegistry functionImplementationRegistry, OptionManager optionManager);

   /**
    * Returns {@link TableMetadataProvider} instance which is used for providing metadata for current {@link GroupScan}.
    *
    * @return {@link TableMetadataProvider} instance the source of metadata
    */
   @JsonIgnore
   TableMetadataProvider getMetadataProvider();

   @JsonIgnore
   TableMetadata getTableMetadata();

   /**
    * Returns {@code true} if current group scan uses metadata obtained from the Metastore.
    *
    * @return {@code true} if current group scan uses metadata obtained from the Metastore, {@code false} otherwise.
    */
   @JsonIgnore
   boolean usedMetastore();

   /**
    * Returns {@link AnalyzeInfoProvider} instance which will be used when running ANALYZE statement.
    *
    * @return {@link AnalyzeInfoProvider} instance
    */
   @JsonIgnore
   AnalyzeInfoProvider getAnalyzeInfoProvider();

   /**
    * Checks whether this group scan supports filter push down.
    *
    * @return {@code true} if this group scan supports filter push down,
    * {@code false} otherwise
    */
   @JsonIgnore
   boolean supportsFilterPushDown();
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.drill.exec.physical.base;

	import com.fasterxml.jackson.annotation.JsonIgnore;
	import java.util.Collection;
	import java.util.List;

	import org.apache.calcite.rel.metadata.RelMetadataQuery;
	import org.apache.drill.common.exceptions.ExecutionSetupException;
	import org.apache.drill.common.expression.LogicalExpression;
	import org.apache.drill.common.expression.SchemaPath;
	import org.apache.drill.exec.expr.fn.FunctionImplementationRegistry;
	import org.apache.drill.exec.metastore.analyze.AnalyzeInfoProvider;
	import org.apache.drill.exec.ops.UdfUtilities;
	import org.apache.drill.exec.physical.PhysicalOperatorSetupException;
	import org.apache.drill.exec.planner.physical.PlannerSettings;
	import org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint;
	import org.apache.drill.exec.server.options.OptionManager;
	import org.apache.drill.metastore.metadata.TableMetadata;
	import org.apache.drill.metastore.metadata.TableMetadataProvider;
	import com.google.common.collect.ImmutableList;
	import org.apache.hadoop.fs.Path;

	/**
	* A GroupScan operator represents all data which will be scanned by a given physical
	* plan. It is the superset of all SubScans for the plan.
	*/
	public interface GroupScan extends Scan, HasAffinity {

	/**
	* columns list in GroupScan : 1) empty_column is for skipAll query.
	* 2) NULL is interpreted as ALL_COLUMNS.
	* How to handle skipAll query is up to each storage plugin, with different policy in corresponding RecordReader.
	*/
	List<SchemaPath> ALL_COLUMNS = ImmutableList.of(SchemaPath.STAR_COLUMN);

	void applyAssignments(List<DrillbitEndpoint> endpoints) throws PhysicalOperatorSetupException;

	SubScan getSpecificScan(int minorFragmentId) throws ExecutionSetupException;

	@JsonIgnore
	int getMaxParallelizationWidth();

	@JsonIgnore
	boolean isDistributed();

	/**
	* At minimum, the GroupScan requires these many fragments to run.
	* Currently, this is used in {@link org.apache.drill.exec.planner.fragment.SimpleParallelizer}
	* @return the minimum number of fragments that should run
	*/
	@JsonIgnore
	int getMinParallelizationWidth();

	/**
	* Check if GroupScan enforces width to be maximum parallelization width.
	* Currently, this is used in {@link org.apache.drill.exec.planner.physical.visitor.ExcessiveExchangeIdentifier}
	* @return if maximum width should be enforced
	*
	* @deprecated Use {@link #getMinParallelizationWidth()} to determine whether this GroupScan spans more than one
	* fragment.
	*/
	@JsonIgnore
	@Deprecated
	boolean enforceWidth();

	/**
	* Returns a signature of the {@link GroupScan} which should usually be composed of
	* all its attributes which could describe it uniquely.
	*/
	@JsonIgnore
	String getDigest();

	@JsonIgnore
	ScanStats getScanStats(PlannerSettings settings);

	@JsonIgnore
	ScanStats getScanStats(RelMetadataQuery mq);

	/**
	* Returns a clone of GroupScan instance, except that the new GroupScan will use the provided list of columns .
	*/
	GroupScan clone(List<SchemaPath> columns);

	/**
	* GroupScan should check the list of columns, and see if it could support all the columns in the list.
	*/
	boolean canPushdownProjects(List<SchemaPath> columns);

	/**
	* Return the number of non-null value in the specified column. Raise exception, if groupscan does not
	* have exact column row count.
	*/
	long getColumnValueCount(SchemaPath column);

	/**
	* Whether or not this GroupScan supports pushdown of partition filters (directories for filesystems)
	*/
	boolean supportsPartitionFilterPushdown();

	/**
	* Returns a list of columns scanned by this group scan
	*
	*/
	List<SchemaPath> getColumns();

	/**
	* Returns a list of columns that can be used for partition pruning
	*
	*/
	@JsonIgnore
	List<SchemaPath> getPartitionColumns();

	/**
	* Whether or not this GroupScan supports limit pushdown
	*/
	boolean supportsLimitPushdown();

	/**
	* Apply rowcount based prune for "LIMIT n" query.
	* @param maxRecords : the number of rows requested from group scan.
	* @return a new instance of group scan if the prune is successful.
	* null when either if row-based prune is not supported, or if prune is not successful.
	*/
	GroupScan applyLimit(int maxRecords);

	/**
	* Return true if this GroupScan can return its selection as a list of file names (retrieved by getFiles()).
	*/
	@JsonIgnore
	boolean hasFiles();

	/**
	* Returns path to the selection root. If this GroupScan cannot provide selection root, it returns null.
	*
	* @return path to the selection root
	*/
	Path getSelectionRoot();

	/**
	* Returns a collection of file names associated with this GroupScan. This should be called after checking
	* hasFiles(). If this GroupScan cannot provide file names, it returns null.
	*
	* @return collection of files paths
	*/
	Collection<Path> getFiles();

	@JsonIgnore
	LogicalExpression getFilter();

	GroupScan applyFilter(LogicalExpression filterExpr, UdfUtilities udfUtilities,
	FunctionImplementationRegistry functionImplementationRegistry, OptionManager optionManager);

	/**
	* Returns {@link TableMetadataProvider} instance which is used for providing metadata for current {@link GroupScan}.
	*
	* @return {@link TableMetadataProvider} instance the source of metadata
	*/
	@JsonIgnore
	TableMetadataProvider getMetadataProvider();

	@JsonIgnore
	TableMetadata getTableMetadata();

	/**
	* Returns {@code true} if current group scan uses metadata obtained from the Metastore.
	*
	* @return {@code true} if current group scan uses metadata obtained from the Metastore, {@code false} otherwise.
	*/
	@JsonIgnore
	boolean usedMetastore();

	/**
	* Returns {@link AnalyzeInfoProvider} instance which will be used when running ANALYZE statement.
	*
	* @return {@link AnalyzeInfoProvider} instance
	*/
	@JsonIgnore
	AnalyzeInfoProvider getAnalyzeInfoProvider();

	/**
	* Checks whether this group scan supports filter push down.
	*
	* @return {@code true} if this group scan supports filter push down,
	* {@code false} otherwise
	*/
	@JsonIgnore
	boolean supportsFilterPushDown();
	}