api/src/main/java/org/apache/iceberg/TableScan.java - iceberg - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 package org.apache.iceberg;

 import java.util.Collection;
 import org.apache.iceberg.expressions.Expression;
 import org.apache.iceberg.io.CloseableIterable;
 import org.apache.iceberg.relocated.com.google.common.collect.Lists;

 /**
  * API for configuring a table scan.
  * <p>
  * TableScan objects are immutable and can be shared between threads. Refinement methods, like
  * {@link #select(Collection)} and {@link #filter(Expression)}, create new TableScan instances.
  */
 public interface TableScan {
   /**
    * Returns the {@link Table} from which this scan loads data.
    *
    * @return this scan's table
    */
   Table table();

   /**
    * Create a new {@link TableScan} from this scan's configuration that will use the given snapshot
    * by ID.
    *
    * @param snapshotId a snapshot ID
    * @return a new scan based on this with the given snapshot ID
    * @throws IllegalArgumentException if the snapshot cannot be found
    */
   TableScan useSnapshot(long snapshotId);

   /**
    * Create a new {@link TableScan} from this scan's configuration that will use the most recent
    * snapshot as of the given time in milliseconds.
    *
    * @param timestampMillis a timestamp in milliseconds.
    * @return a new scan based on this with the current snapshot at the given time
    * @throws IllegalArgumentException if the snapshot cannot be found
    */
   TableScan asOfTime(long timestampMillis);

   /**
    * Create a new {@link TableScan} from this scan's configuration that will override the {@link Table}'s behavior based
    * on the incoming pair. Unknown properties will be ignored.
    *
    * @param property name of the table property to be overridden
    * @param value value to override with
    * @return a new scan based on this with overridden behavior
    */
   TableScan option(String property, String value);

   /**
    * Create a new {@link TableScan} from this with the schema as its projection.
    *
    * @param schema a projection schema
    * @return a new scan based on this with the given projection
    */
   TableScan project(Schema schema);

   /**
    * Create a new {@link TableScan} from this that, if data columns where selected
    * via {@link #select(java.util.Collection)}, controls whether the match to the schema will be done
    * with case sensitivity.
    *
    * @return a new scan based on this with case sensitivity as stated
    */
   TableScan caseSensitive(boolean caseSensitive);

   /**
    * Create a new {@link TableScan} from this that loads the column stats with each data file.
    * <p>
    * Column stats include: value count, null value count, lower bounds, and upper bounds.
    *
    * @return a new scan based on this that loads column stats.
    */
   TableScan includeColumnStats();

   /**
    * Create a new {@link TableScan} from this that will read the given data columns. This produces
    * an expected schema that includes all fields that are either selected or used by this scan's
    * filter expression.
    *
    * @param columns column names from the table's schema
    * @return a new scan based on this with the given projection columns
    */
   default TableScan select(String... columns) {
     return select(Lists.newArrayList(columns));
   }

   /**
    * Create a new {@link TableScan} from this that will read the given data columns. This produces
    * an expected schema that includes all fields that are either selected or used by this scan's
    * filter expression.
    *
    * @param columns column names from the table's schema
    * @return a new scan based on this with the given projection columns
    */
   TableScan select(Collection<String> columns);

   /**
    * Create a new {@link TableScan} from the results of this filtered by the {@link Expression}.
    *
    * @param expr a filter expression
    * @return a new scan based on this with results filtered by the expression
    */
   TableScan filter(Expression expr);

   /**
    * Returns this scan's filter {@link Expression}.
    *
    * @return this scan's filter expression
    */
   Expression filter();

   /**
    * Create a new {@link TableScan} from this that applies data filtering to files but not to rows in those files.
    *
    * @return a new scan based on this that does not filter rows in files.
    */
   TableScan ignoreResiduals();

   /**
    * Create a new {@link TableScan} to read appended data from {@code fromSnapshotId} exclusive to {@code toSnapshotId}
    * inclusive.
    *
    * @param fromSnapshotId the last snapshot id read by the user, exclusive
    * @param toSnapshotId read append data up to this snapshot id
    * @return a table scan which can read append data from {@code fromSnapshotId}
    * exclusive and up to {@code toSnapshotId} inclusive
    */
   TableScan appendsBetween(long fromSnapshotId, long toSnapshotId);

   /**
    * Create a new {@link TableScan} to read appended data from {@code fromSnapshotId} exclusive to the current snapshot
    * inclusive.
    *
    * @param fromSnapshotId - the last snapshot id read by the user, exclusive
    * @return a table scan which can read append data from {@code fromSnapshotId}
    * exclusive and up to current snapshot inclusive
    */
   TableScan appendsAfter(long fromSnapshotId);

   /**
    * Plan the {@link FileScanTask files} that will be read by this scan.
    * <p>
    * Each file has a residual expression that should be applied to filter the file's rows.
    * <p>
    * This simple plan returns file scans for each file from position 0 to the file's length. For
    * planning that will combine small files, split large files, and attempt to balance work, use
    * {@link #planTasks()} instead.
    *
    * @return an Iterable of file tasks that are required by this scan
    */
   CloseableIterable<FileScanTask> planFiles();

   /**
    * Plan the {@link CombinedScanTask tasks} for this scan.
    * <p>
    * Tasks created by this method may read partial input files, multiple input files, or both.
    *
    * @return an Iterable of tasks for this scan
    */
   CloseableIterable<CombinedScanTask> planTasks();

   /**
    * Returns this scan's projection {@link Schema}.
    * <p>
    * If the projection schema was set directly using {@link #project(Schema)}, returns that schema.
    * <p>
    * If the projection schema was set by calling {@link #select(Collection)}, returns a projection
    * schema that includes the selected data fields and any fields used in the filter expression.
    *
    * @return this scan's projection schema
    */
   Schema schema();

   /**
    * Returns the {@link Snapshot} that will be used by this scan.
    * <p>
    * If the snapshot was not configured using {@link #asOfTime(long)} or {@link #useSnapshot(long)}, the current table
    * snapshot will be used.
    *
    * @return the Snapshot this scan will use
    */
   Snapshot snapshot();

   /**
    * Returns whether this scan should apply column name case sensitiveness as per {@link #caseSensitive(boolean)}.
    * @return true if case sensitive, false otherwise.
    */
   boolean isCaseSensitive();

 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/

	package org.apache.iceberg;

	import java.util.Collection;
	import org.apache.iceberg.expressions.Expression;
	import org.apache.iceberg.io.CloseableIterable;
	import org.apache.iceberg.relocated.com.google.common.collect.Lists;

	/**
	* API for configuring a table scan.
	* <p>
	* TableScan objects are immutable and can be shared between threads. Refinement methods, like
	* {@link #select(Collection)} and {@link #filter(Expression)}, create new TableScan instances.
	*/
	public interface TableScan {
	/**
	* Returns the {@link Table} from which this scan loads data.
	*
	* @return this scan's table
	*/
	Table table();

	/**
	* Create a new {@link TableScan} from this scan's configuration that will use the given snapshot
	* by ID.
	*
	* @param snapshotId a snapshot ID
	* @return a new scan based on this with the given snapshot ID
	* @throws IllegalArgumentException if the snapshot cannot be found
	*/
	TableScan useSnapshot(long snapshotId);

	/**
	* Create a new {@link TableScan} from this scan's configuration that will use the most recent
	* snapshot as of the given time in milliseconds.
	*
	* @param timestampMillis a timestamp in milliseconds.
	* @return a new scan based on this with the current snapshot at the given time
	* @throws IllegalArgumentException if the snapshot cannot be found
	*/
	TableScan asOfTime(long timestampMillis);

	/**
	* Create a new {@link TableScan} from this scan's configuration that will override the {@link Table}'s behavior based
	* on the incoming pair. Unknown properties will be ignored.
	*
	* @param property name of the table property to be overridden
	* @param value value to override with
	* @return a new scan based on this with overridden behavior
	*/
	TableScan option(String property, String value);

	/**
	* Create a new {@link TableScan} from this with the schema as its projection.
	*
	* @param schema a projection schema
	* @return a new scan based on this with the given projection
	*/
	TableScan project(Schema schema);

	/**
	* Create a new {@link TableScan} from this that, if data columns where selected
	* via {@link #select(java.util.Collection)}, controls whether the match to the schema will be done
	* with case sensitivity.
	*
	* @return a new scan based on this with case sensitivity as stated
	*/
	TableScan caseSensitive(boolean caseSensitive);

	/**
	* Create a new {@link TableScan} from this that loads the column stats with each data file.
	* <p>
	* Column stats include: value count, null value count, lower bounds, and upper bounds.
	*
	* @return a new scan based on this that loads column stats.
	*/
	TableScan includeColumnStats();

	/**
	* Create a new {@link TableScan} from this that will read the given data columns. This produces
	* an expected schema that includes all fields that are either selected or used by this scan's
	* filter expression.
	*
	* @param columns column names from the table's schema
	* @return a new scan based on this with the given projection columns
	*/
	default TableScan select(String... columns) {
	return select(Lists.newArrayList(columns));
	}

	/**
	* Create a new {@link TableScan} from this that will read the given data columns. This produces
	* an expected schema that includes all fields that are either selected or used by this scan's
	* filter expression.
	*
	* @param columns column names from the table's schema
	* @return a new scan based on this with the given projection columns
	*/
	TableScan select(Collection<String> columns);

	/**
	* Create a new {@link TableScan} from the results of this filtered by the {@link Expression}.
	*
	* @param expr a filter expression
	* @return a new scan based on this with results filtered by the expression
	*/
	TableScan filter(Expression expr);

	/**
	* Returns this scan's filter {@link Expression}.
	*
	* @return this scan's filter expression
	*/
	Expression filter();

	/**
	* Create a new {@link TableScan} from this that applies data filtering to files but not to rows in those files.
	*
	* @return a new scan based on this that does not filter rows in files.
	*/
	TableScan ignoreResiduals();

	/**
	* Create a new {@link TableScan} to read appended data from {@code fromSnapshotId} exclusive to {@code toSnapshotId}
	* inclusive.
	*
	* @param fromSnapshotId the last snapshot id read by the user, exclusive
	* @param toSnapshotId read append data up to this snapshot id
	* @return a table scan which can read append data from {@code fromSnapshotId}
	* exclusive and up to {@code toSnapshotId} inclusive
	*/
	TableScan appendsBetween(long fromSnapshotId, long toSnapshotId);

	/**
	* Create a new {@link TableScan} to read appended data from {@code fromSnapshotId} exclusive to the current snapshot
	* inclusive.
	*
	* @param fromSnapshotId - the last snapshot id read by the user, exclusive
	* @return a table scan which can read append data from {@code fromSnapshotId}
	* exclusive and up to current snapshot inclusive
	*/
	TableScan appendsAfter(long fromSnapshotId);

	/**
	* Plan the {@link FileScanTask files} that will be read by this scan.
	* <p>
	* Each file has a residual expression that should be applied to filter the file's rows.
	* <p>
	* This simple plan returns file scans for each file from position 0 to the file's length. For
	* planning that will combine small files, split large files, and attempt to balance work, use
	* {@link #planTasks()} instead.
	*
	* @return an Iterable of file tasks that are required by this scan
	*/
	CloseableIterable<FileScanTask> planFiles();

	/**
	* Plan the {@link CombinedScanTask tasks} for this scan.
	* <p>
	* Tasks created by this method may read partial input files, multiple input files, or both.
	*
	* @return an Iterable of tasks for this scan
	*/
	CloseableIterable<CombinedScanTask> planTasks();

	/**
	* Returns this scan's projection {@link Schema}.
	* <p>
	* If the projection schema was set directly using {@link #project(Schema)}, returns that schema.
	* <p>
	* If the projection schema was set by calling {@link #select(Collection)}, returns a projection
	* schema that includes the selected data fields and any fields used in the filter expression.
	*
	* @return this scan's projection schema
	*/
	Schema schema();

	/**
	* Returns the {@link Snapshot} that will be used by this scan.
	* <p>
	* If the snapshot was not configured using {@link #asOfTime(long)} or {@link #useSnapshot(long)}, the current table
	* snapshot will be used.
	*
	* @return the Snapshot this scan will use
	*/
	Snapshot snapshot();

	/**
	* Returns whether this scan should apply column name case sensitiveness as per {@link #caseSensitive(boolean)}.
	* @return true if case sensitive, false otherwise.
	*/
	boolean isCaseSensitive();

	}