core/src/main/java/org/apache/calcite/sql/TableCharacteristic.java - calcite - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to you under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.calcite.sql;

 import org.checkerframework.checker.nullness.qual.Nullable;

 import java.util.Objects;

 /**
  * A table-valued input parameter of a table function is classified by three
  * characteristics.
  *
  * <p>The first characteristic is semantics. The table has either
  * row semantics or set semantics.
  *
  * <p>Row semantics means that the result of the table function is decided
  * on a row-by-row basis.
  *
  * <p>Example of a table function with row semantics input table parameter:
  * We often need to read a CSV file, generally, the first line of the file
  * contains a list of column names, and subsequent lines of the file contain
  * data. The data in general can be treated as a large VARCHAR.
  * However, some of the fields may be numeric or datetime.
  *
  * <p>A table function named CSVReader is designed to read a file of
  * comma-separated values and interpret this file as a table.
  * The function has three parameters:
  * <ul>
  * <li>The first parameter, File, is the name of a file on the query author's
  * system. This file must contain the comma-separated values that are to be
  * converted to a table. The first line of the file contains the names of
  * the resulting columns. Succeeding lines contain the data. Each line after
  * the first will result in one row of output, with column names as determined
  * by the first line of the input.
  * <li>Floats is a descriptor area, which should provide a list of the
  * column names that are to be interpreted numerically.
  * These columns will be output with the data type FLOAT.
  * <li>Dates is a descriptor area, which provides a list of the column
  * names that are to be interpreted as datetimes.
  * These columns will be output with the data type DATE.
  * </ul>
  *
  * <p>How to use this table function in query?
  *
  * <p>For a csv file which contents are:
  * <blockquote><pre>
  * docno,name,due_date,principle,interest
  * 123,Mary,01/01/2014,234.56,345.67
  * 234,Edgar,01/01/2014,654.32,543.21
  * </pre></blockquote>
  *
  * <p>The query author may write a query such as the following:
  * <blockquote><pre>{@code
  * SELECT *
  * FROM TABLE (
  *   CSVreader (
  *   'abc.csv',
  *   DESCRIPTOR ("principle", "interest")
  *   DESCRIPTOR ("due_date")))
  * }</pre></blockquote>
  *
  * <table>
  * <caption>Results of the query</caption>
  * <tr>
  * <th>docno</th>
  * <th>name</th>
  * <th>due_date</th>
  * <th>principle</th>
  * <th>interest</th>
  * </tr>
  * <tr>
  * <th>123</th>
  * <th>Mary</th>
  * <th>01/01/2014</th>
  * <th>234.56</th>
  * <th>345.67</th>
  * </tr>
  * <tr>
  * <th>234</th>
  * <th>Edgar</th>
  * <th>01/01/2014</th>
  * <th>654.32</th>
  * <th>543.21</th>
  * </tr>
  * </table>
  *
  * <p>Set semantics means that the outcome of the function depends on how
  * the data is partitioned. Set semantics is useful to implement user-defined
  * analytics like aggregation or window functions.
  * They operate on an entire table or a logical partition of it.
  *
  * <p>Example of a table function with set semantics input table parameter:
  * TopN takes an input table that has been sorted on a numeric column.
  * It copies the first n rows through to the output table. Any additional
  * rows are summarized in a single output row in which the sort column has
  * been summed and all other columns are null.
  * TopN function has two parameters:
  * <ul>
  * <li>The first parameter, Input, is the input table. This table has set
  * semantics, meaning that the result depends on the set of data (since the
  * last row is a summary row). In addition, the table is marked as PRUNE WHEN
  * EMPTY, meaning that the result is necessarily empty if the input is empty.
  * The query author must order this input table on a single numeric column
  * (syntax below).
  * <li>The second parameter, Howmany, specifies how many input rows that the
  * user wants to be copied into the output table; all rows after this will
  * contribute to the final summary row in the output.
  * </ul>
  *
  * <p>How to use this table function in query?
  * <table>
  * <caption>Original records of table orders</caption>
  * <tr>
  * <th>region</th>
  * <th>product</th>
  * <th>sales</th>
  * </tr>
  * <tr>
  * <th>East</th>
  * <th>A</th>
  * <th>1234.56</th>
  * </tr>
  * <tr>
  * <th>East</th>
  * <th>B</th>
  * <th>987.65</th>
  * </tr>
  * <tr>
  * <th>East</th>
  * <th>C</th>
  * <th>876.54</th>
  * </tr>
  * <tr>
  * <th>East</th>
  * <th>D</th>
  * <th>765.43</th>
  * </tr>
  * <tr>
  * <th>East</th>
  * <th>E</th>
  * <th>654.32</th>
  * </tr>
  * <tr>
  * <th>West</th>
  * <th>E</th>
  * <th>2345.67</th>
  * </tr>
  * <tr>
  * <th>West</th>
  * <th>D</th>
  * <th>2001.33</th>
  * </tr>
  * <tr>
  * <th>West</th>
  * <th>C</th>
  * <th>1357.99</th>
  * </tr>
  * <tr>
  * <th>West</th>
  * <th>B</th>
  * <th>975.35</th>
  * </tr>
  * <tr>
  * <th>West</th>
  * <th>A</th>
  * <th>864,22</th>
  * </tr>
  * </table>
  *
  * <p>The query author may write a query such as the following:
  * <blockquote><pre>{@code
  * SELECT *
  * FROM TABLE(
  *   Topn(
  *     TABLE orders PARTITION BY region ORDER BY sales desc,
  *     3))
  * }</pre></blockquote>
  *
  * <p>The result will be:
  * <table>
  * <caption>Original records of table orders</caption>
  * <tr>
  * <th>region</th>
  * <th>product</th>
  * <th>sales</th>
  * </tr>
  * <tr>
  * <th>East</th>
  * <th>A</th>
  * <th>1234.56</th>
  * </tr>
  * <tr>
  * <th>East</th>
  * <th>B</th>
  * <th>987.65</th>
  * </tr>
  * <tr>
  * <th>East</th>
  * <th>C</th>
  * <th>876.54</th>
  * </tr>
  * <tr>
  * <th>West</th>
  * <th>E</th>
  * <th>2345.67</th>
  * </tr>
  * <tr>
  * <th>West</th>
  * <th>D</th>
  * <th>2001.33</th>
  * </tr>
  * <tr>
  * <th>West</th>
  * <th>C</th>
  * <th>1357.99</th>
  * </tr>
  * </table>
  *
  * <p>The second characteristic of input table parameter only applies to input
  * table with set semantics. It specifies whether the table function can
  * generate a result row even if the input table is empty.
  *
  * <p>The third characteristic is whether the input table supports
  * pass-through columns or not.
  */
 public class TableCharacteristic {

   /**
    * Input table has either row semantics or set semantics.
    */
   public final Semantics semantics;

   /**
    * If the value is true, meaning that the DBMS can prune virtual processors
    * from the query plan if the input table is empty.
    * If the value is false, meaning that the DBMS must actually instantiate
    * a virtual processor (or more than one virtual processor in the presence
    * of other input tables).
    */
   public final boolean pruneIfEmpty;

   /**
    * If the value is true, for each input row, the table function makes the
    * entire input row available in the output, qualified by a range variable
    * associated with the input table. Otherwise the value is false.
    */
   public final boolean passColumnsThrough;

   private TableCharacteristic(
       Semantics semantics,
       boolean pruneIfEmpty,
       boolean passColumnsThrough) {
     this.semantics = semantics;
     this.pruneIfEmpty = pruneIfEmpty;
     this.passColumnsThrough = passColumnsThrough;
   }

   /** Creates a builder. */
   public static TableCharacteristic.Builder builder(Semantics semantics) {
     return new Builder(semantics);
   }

   @Override public boolean equals(@Nullable Object o) {
     if (this == o) {
       return true;
     }
     if (o == null || getClass() != o.getClass()) {
       return false;
     }
     TableCharacteristic that = (TableCharacteristic) o;
     return pruneIfEmpty == that.pruneIfEmpty
         && passColumnsThrough == that.passColumnsThrough
         && semantics == that.semantics;
   }

   @Override public int hashCode() {
     return Objects.hash(semantics, pruneIfEmpty, passColumnsThrough);
   }

   @Override public String toString() {
     return "TableCharacteristic{"
         + "semantics=" + semantics
         + ", pruneIfEmpty=" + pruneIfEmpty
         + ", passColumnsThrough=" + passColumnsThrough
         + '}';
   }

   /**
    * Input table has either row semantics or set semantics.
    */
   public enum Semantics {
     /**
      * Row semantics means that the result of the Window TableFunction
      * is decided on a row-by-row basis.
      * As an extreme example, the DBMS could atomize the input table into
      * individual rows, and send each single row to a different virtual
      * processor.
      */
     ROW,

     /**
      * Set semantics means that the outcome of the Window TableFunction
      * depends on how the data is partitioned.
      * A partition may not be split across virtual processors, nor may a
      * virtual processor handle more than one partition.
      */
     SET
   }

   /**
    * Builder for {@link TableCharacteristic}.
    */
   public static class Builder {
     private final Semantics semantics;
     private boolean pruneIfEmpty = false;
     private boolean passColumnsThrough = false;

     /** Creates the semantics. */
     private Builder(Semantics semantics) {
       if (semantics == Semantics.ROW) {
         // Tables with row semantics are always effectively prune when empty.
         this.pruneIfEmpty = true;
       }
       this.semantics = semantics;
     }

     /** DBMS could prune virtual processors if the input table is empty. */
     public Builder pruneIfEmpty() {
       this.pruneIfEmpty = true;
       return this;
     }

     /** Input table supports pass-through columns. */
     public Builder passColumnsThrough() {
       this.passColumnsThrough = true;
       return this;
     }

     public TableCharacteristic build() {
       return new TableCharacteristic(semantics, pruneIfEmpty, passColumnsThrough);
     }
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to you under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.calcite.sql;

	import org.checkerframework.checker.nullness.qual.Nullable;

	import java.util.Objects;

	/**
	* A table-valued input parameter of a table function is classified by three
	* characteristics.
	*
	* <p>The first characteristic is semantics. The table has either
	* row semantics or set semantics.
	*
	* <p>Row semantics means that the result of the table function is decided
	* on a row-by-row basis.
	*
	* <p>Example of a table function with row semantics input table parameter:
	* We often need to read a CSV file, generally, the first line of the file
	* contains a list of column names, and subsequent lines of the file contain
	* data. The data in general can be treated as a large VARCHAR.
	* However, some of the fields may be numeric or datetime.
	*
	* <p>A table function named CSVReader is designed to read a file of
	* comma-separated values and interpret this file as a table.
	* The function has three parameters:
	* <ul>
	* <li>The first parameter, File, is the name of a file on the query author's
	* system. This file must contain the comma-separated values that are to be
	* converted to a table. The first line of the file contains the names of
	* the resulting columns. Succeeding lines contain the data. Each line after
	* the first will result in one row of output, with column names as determined
	* by the first line of the input.
	* <li>Floats is a descriptor area, which should provide a list of the
	* column names that are to be interpreted numerically.
	* These columns will be output with the data type FLOAT.
	* <li>Dates is a descriptor area, which provides a list of the column
	* names that are to be interpreted as datetimes.
	* These columns will be output with the data type DATE.
	* </ul>
	*
	* <p>How to use this table function in query?
	*
	* <p>For a csv file which contents are:
	* <blockquote><pre>
	* docno,name,due_date,principle,interest
	* 123,Mary,01/01/2014,234.56,345.67
	* 234,Edgar,01/01/2014,654.32,543.21
	* </pre></blockquote>
	*
	* <p>The query author may write a query such as the following:
	* <blockquote><pre>{@code
	* SELECT *
	* FROM TABLE (
	* CSVreader (
	* 'abc.csv',
	* DESCRIPTOR ("principle", "interest")
	* DESCRIPTOR ("due_date")))
	* }</pre></blockquote>
	*
	* <table>
	* <caption>Results of the query</caption>
	* <tr>
	* <th>docno</th>
	* <th>name</th>
	* <th>due_date</th>
	* <th>principle</th>
	* <th>interest</th>
	* </tr>
	* <tr>
	* <th>123</th>
	* <th>Mary</th>
	* <th>01/01/2014</th>
	* <th>234.56</th>
	* <th>345.67</th>
	* </tr>
	* <tr>
	* <th>234</th>
	* <th>Edgar</th>
	* <th>01/01/2014</th>
	* <th>654.32</th>
	* <th>543.21</th>
	* </tr>
	* </table>
	*
	* <p>Set semantics means that the outcome of the function depends on how
	* the data is partitioned. Set semantics is useful to implement user-defined
	* analytics like aggregation or window functions.
	* They operate on an entire table or a logical partition of it.
	*
	* <p>Example of a table function with set semantics input table parameter:
	* TopN takes an input table that has been sorted on a numeric column.
	* It copies the first n rows through to the output table. Any additional
	* rows are summarized in a single output row in which the sort column has
	* been summed and all other columns are null.
	* TopN function has two parameters:
	* <ul>
	* <li>The first parameter, Input, is the input table. This table has set
	* semantics, meaning that the result depends on the set of data (since the
	* last row is a summary row). In addition, the table is marked as PRUNE WHEN
	* EMPTY, meaning that the result is necessarily empty if the input is empty.
	* The query author must order this input table on a single numeric column
	* (syntax below).
	* <li>The second parameter, Howmany, specifies how many input rows that the
	* user wants to be copied into the output table; all rows after this will
	* contribute to the final summary row in the output.
	* </ul>
	*
	* <p>How to use this table function in query?
	* <table>
	* <caption>Original records of table orders</caption>
	* <tr>
	* <th>region</th>
	* <th>product</th>
	* <th>sales</th>
	* </tr>
	* <tr>
	* <th>East</th>
	* <th>A</th>
	* <th>1234.56</th>
	* </tr>
	* <tr>
	* <th>East</th>
	* <th>B</th>
	* <th>987.65</th>
	* </tr>
	* <tr>
	* <th>East</th>
	* <th>C</th>
	* <th>876.54</th>
	* </tr>
	* <tr>
	* <th>East</th>
	* <th>D</th>
	* <th>765.43</th>
	* </tr>
	* <tr>
	* <th>East</th>
	* <th>E</th>
	* <th>654.32</th>
	* </tr>
	* <tr>
	* <th>West</th>
	* <th>E</th>
	* <th>2345.67</th>
	* </tr>
	* <tr>
	* <th>West</th>
	* <th>D</th>
	* <th>2001.33</th>
	* </tr>
	* <tr>
	* <th>West</th>
	* <th>C</th>
	* <th>1357.99</th>
	* </tr>
	* <tr>
	* <th>West</th>
	* <th>B</th>
	* <th>975.35</th>
	* </tr>
	* <tr>
	* <th>West</th>
	* <th>A</th>
	* <th>864,22</th>
	* </tr>
	* </table>
	*
	* <p>The query author may write a query such as the following:
	* <blockquote><pre>{@code
	* SELECT *
	* FROM TABLE(
	* Topn(
	* TABLE orders PARTITION BY region ORDER BY sales desc,
	* 3))
	* }</pre></blockquote>
	*
	* <p>The result will be:
	* <table>
	* <caption>Original records of table orders</caption>
	* <tr>
	* <th>region</th>
	* <th>product</th>
	* <th>sales</th>
	* </tr>
	* <tr>
	* <th>East</th>
	* <th>A</th>
	* <th>1234.56</th>
	* </tr>
	* <tr>
	* <th>East</th>
	* <th>B</th>
	* <th>987.65</th>
	* </tr>
	* <tr>
	* <th>East</th>
	* <th>C</th>
	* <th>876.54</th>
	* </tr>
	* <tr>
	* <th>West</th>
	* <th>E</th>
	* <th>2345.67</th>
	* </tr>
	* <tr>
	* <th>West</th>
	* <th>D</th>
	* <th>2001.33</th>
	* </tr>
	* <tr>
	* <th>West</th>
	* <th>C</th>
	* <th>1357.99</th>
	* </tr>
	* </table>
	*
	* <p>The second characteristic of input table parameter only applies to input
	* table with set semantics. It specifies whether the table function can
	* generate a result row even if the input table is empty.
	*
	* <p>The third characteristic is whether the input table supports
	* pass-through columns or not.
	*/
	public class TableCharacteristic {

	/**
	* Input table has either row semantics or set semantics.
	*/
	public final Semantics semantics;

	/**
	* If the value is true, meaning that the DBMS can prune virtual processors
	* from the query plan if the input table is empty.
	* If the value is false, meaning that the DBMS must actually instantiate
	* a virtual processor (or more than one virtual processor in the presence
	* of other input tables).
	*/
	public final boolean pruneIfEmpty;

	/**
	* If the value is true, for each input row, the table function makes the
	* entire input row available in the output, qualified by a range variable
	* associated with the input table. Otherwise the value is false.
	*/
	public final boolean passColumnsThrough;

	private TableCharacteristic(
	Semantics semantics,
	boolean pruneIfEmpty,
	boolean passColumnsThrough) {
	this.semantics = semantics;
	this.pruneIfEmpty = pruneIfEmpty;
	this.passColumnsThrough = passColumnsThrough;
	}

	/** Creates a builder. */
	public static TableCharacteristic.Builder builder(Semantics semantics) {
	return new Builder(semantics);
	}

	@Override public boolean equals(@Nullable Object o) {
	if (this == o) {
	return true;
	}
	if (o == null \|\| getClass() != o.getClass()) {
	return false;
	}
	TableCharacteristic that = (TableCharacteristic) o;
	return pruneIfEmpty == that.pruneIfEmpty
	&& passColumnsThrough == that.passColumnsThrough
	&& semantics == that.semantics;
	}

	@Override public int hashCode() {
	return Objects.hash(semantics, pruneIfEmpty, passColumnsThrough);
	}

	@Override public String toString() {
	return "TableCharacteristic{"
	+ "semantics=" + semantics
	+ ", pruneIfEmpty=" + pruneIfEmpty
	+ ", passColumnsThrough=" + passColumnsThrough
	+ '}';
	}

	/**
	* Input table has either row semantics or set semantics.
	*/
	public enum Semantics {
	/**
	* Row semantics means that the result of the Window TableFunction
	* is decided on a row-by-row basis.
	* As an extreme example, the DBMS could atomize the input table into
	* individual rows, and send each single row to a different virtual
	* processor.
	*/
	ROW,

	/**
	* Set semantics means that the outcome of the Window TableFunction
	* depends on how the data is partitioned.
	* A partition may not be split across virtual processors, nor may a
	* virtual processor handle more than one partition.
	*/
	SET
	}

	/**
	* Builder for {@link TableCharacteristic}.
	*/
	public static class Builder {
	private final Semantics semantics;
	private boolean pruneIfEmpty = false;
	private boolean passColumnsThrough = false;

	/** Creates the semantics. */
	private Builder(Semantics semantics) {
	if (semantics == Semantics.ROW) {
	// Tables with row semantics are always effectively prune when empty.
	this.pruneIfEmpty = true;
	}
	this.semantics = semantics;
	}

	/** DBMS could prune virtual processors if the input table is empty. */
	public Builder pruneIfEmpty() {
	this.pruneIfEmpty = true;
	return this;
	}

	/** Input table supports pass-through columns. */
	public Builder passColumnsThrough() {
	this.passColumnsThrough = true;
	return this;
	}

	public TableCharacteristic build() {
	return new TableCharacteristic(semantics, pruneIfEmpty, passColumnsThrough);
	}
	}
	}