exec/java-exec/src/main/java/org/apache/drill/exec/physical/base/ScanStats.java - drill - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.drill.exec.physical.base;

 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;

 /**
  * Cost estimate for a scan. In general, relative costs are more important
  * than absolute costs. If a scan supports filter push-down, the cost of
  * the scan after the push-down must be less than the combined cost of
  * the scan + project before push down, else Calcite will ignore the
  * push-down. Also, the estimated row count may influence whether the
  * table can be broadcast or hash partitioned. Otherwise, Calcite has
  * no real choices based on scan cost.
  */
 public class ScanStats {

   public static final ScanStats TRIVIAL_TABLE = new ScanStats(GroupScanProperty.NO_EXACT_ROW_COUNT, 20, 1, 1);

   public static final ScanStats ZERO_RECORD_TABLE = new ScanStats(GroupScanProperty.EXACT_ROW_COUNT, 0, 1, 1);

   @JsonProperty
   private final GroupScanProperty groupScanProperty;
   @JsonProperty
   private final double recordCount;

   /**
    * CPU cost for the scan which should consider both row and column
    * count, and the effect of filters. Considered only if the group scan property is
    * set to {@link GroupScanProperty#ESTIMATED_TOTAL_COST FULL_COST}. Default
    * CPU cost is simply row count * column count.
    */
   @JsonProperty
   private final double cpuCost;

   /**
    * I/O cost for the scan. Considered only if the group scan property is
    * set to {@link GroupScanProperty#ESTIMATED_TOTAL_COST FULL_COST}. Drill does not
    * differentiate between network and disk I/O, despite the field name.
    */
   @JsonProperty
   private final double diskCost;

   @JsonCreator
   public ScanStats(@JsonProperty("groupScanProperty") GroupScanProperty groupScanProperty,
                    @JsonProperty("recordCount") double recordCount,
                    @JsonProperty("cpuCost") double cpuCost,
                    @JsonProperty("diskCost") double diskCost) {
     this.groupScanProperty = groupScanProperty;
     this.recordCount = recordCount;
     this.cpuCost = cpuCost;
     this.diskCost = diskCost;
   }

   /**
    * Return whether GroupScan knows the exact row count in the result of getSize()
    * call. By default, group scan does not know the exact row count, before it
    * scans every rows. Currently, Parquet group scan will return the exact row
    * count.
    *
    * @return group scan property
    */
   public GroupScanProperty getGroupScanProperty() {
     return groupScanProperty;
   }

   public double getRecordCount() {
     return recordCount;
   }

   public double getCpuCost() {
     return cpuCost;
   }

   public double getDiskCost() {
     return diskCost;
   }

   @Override
   public String toString() {
     return "ScanStats{" +
       "recordCount=" + recordCount +
       ", cpuCost=" + cpuCost +
       ", diskCost=" + diskCost +
       ", groupScanProperty=" + groupScanProperty +
       '}';
   }

   public enum GroupScanProperty {
     NO_EXACT_ROW_COUNT(false, false),
     EXACT_ROW_COUNT(true, true),

     /**
      * Tells the planner to consider the full cost represented
      * here. Else, the planner only looks at row count. However,
      * we don't know the actual row count, a COUNT(*) query must
      * still look at the input source if it wants an accurate count.
      */
     ESTIMATED_TOTAL_COST(false, true);

     private boolean hasExactRowCount, hasExactColumnValueCount;

     GroupScanProperty (boolean hasExactRowCount, boolean hasExactColumnValueCount) {
       this.hasExactRowCount = hasExactRowCount;
       this.hasExactColumnValueCount = hasExactColumnValueCount;
     }

     public boolean hasExactRowCount() {
       return hasExactRowCount;
     }

     public boolean hasExactColumnValueCount() {
       return hasExactColumnValueCount;
     }

     public boolean hasFullCost() {
       return this == ESTIMATED_TOTAL_COST;
     }
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.drill.exec.physical.base;

	import com.fasterxml.jackson.annotation.JsonCreator;
	import com.fasterxml.jackson.annotation.JsonProperty;

	/**
	* Cost estimate for a scan. In general, relative costs are more important
	* than absolute costs. If a scan supports filter push-down, the cost of
	* the scan after the push-down must be less than the combined cost of
	* the scan + project before push down, else Calcite will ignore the
	* push-down. Also, the estimated row count may influence whether the
	* table can be broadcast or hash partitioned. Otherwise, Calcite has
	* no real choices based on scan cost.
	*/
	public class ScanStats {

	public static final ScanStats TRIVIAL_TABLE = new ScanStats(GroupScanProperty.NO_EXACT_ROW_COUNT, 20, 1, 1);

	public static final ScanStats ZERO_RECORD_TABLE = new ScanStats(GroupScanProperty.EXACT_ROW_COUNT, 0, 1, 1);

	@JsonProperty
	private final GroupScanProperty groupScanProperty;
	@JsonProperty
	private final double recordCount;

	/**
	* CPU cost for the scan which should consider both row and column
	* count, and the effect of filters. Considered only if the group scan property is
	* set to {@link GroupScanProperty#ESTIMATED_TOTAL_COST FULL_COST}. Default
	* CPU cost is simply row count * column count.
	*/
	@JsonProperty
	private final double cpuCost;

	/**
	* I/O cost for the scan. Considered only if the group scan property is
	* set to {@link GroupScanProperty#ESTIMATED_TOTAL_COST FULL_COST}. Drill does not
	* differentiate between network and disk I/O, despite the field name.
	*/
	@JsonProperty
	private final double diskCost;

	@JsonCreator
	public ScanStats(@JsonProperty("groupScanProperty") GroupScanProperty groupScanProperty,
	@JsonProperty("recordCount") double recordCount,
	@JsonProperty("cpuCost") double cpuCost,
	@JsonProperty("diskCost") double diskCost) {
	this.groupScanProperty = groupScanProperty;
	this.recordCount = recordCount;
	this.cpuCost = cpuCost;
	this.diskCost = diskCost;
	}

	/**
	* Return whether GroupScan knows the exact row count in the result of getSize()
	* call. By default, group scan does not know the exact row count, before it
	* scans every rows. Currently, Parquet group scan will return the exact row
	* count.
	*
	* @return group scan property
	*/
	public GroupScanProperty getGroupScanProperty() {
	return groupScanProperty;
	}

	public double getRecordCount() {
	return recordCount;
	}

	public double getCpuCost() {
	return cpuCost;
	}

	public double getDiskCost() {
	return diskCost;
	}

	@Override
	public String toString() {
	return "ScanStats{" +
	"recordCount=" + recordCount +
	", cpuCost=" + cpuCost +
	", diskCost=" + diskCost +
	", groupScanProperty=" + groupScanProperty +
	'}';
	}

	public enum GroupScanProperty {
	NO_EXACT_ROW_COUNT(false, false),
	EXACT_ROW_COUNT(true, true),

	/**
	* Tells the planner to consider the full cost represented
	* here. Else, the planner only looks at row count. However,
	* we don't know the actual row count, a COUNT(*) query must
	* still look at the input source if it wants an accurate count.
	*/
	ESTIMATED_TOTAL_COST(false, true);

	private boolean hasExactRowCount, hasExactColumnValueCount;

	GroupScanProperty (boolean hasExactRowCount, boolean hasExactColumnValueCount) {
	this.hasExactRowCount = hasExactRowCount;
	this.hasExactColumnValueCount = hasExactColumnValueCount;
	}

	public boolean hasExactRowCount() {
	return hasExactRowCount;
	}

	public boolean hasExactColumnValueCount() {
	return hasExactColumnValueCount;
	}

	public boolean hasFullCost() {
	return this == ESTIMATED_TOTAL_COST;
	}
	}
	}