blob: 245a965494bcf90d17f828c0ce1aa6a4f645712d [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to you under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.calcite.sql;
import org.checkerframework.checker.nullness.qual.Nullable;
import java.util.Objects;
/**
* A table-valued input parameter of a table function is classified by three
* characteristics.
*
* <p>The first characteristic is semantics. The table has either
* row semantics or set semantics.
*
* <p>Row semantics means that the result of the table function is decided
* on a row-by-row basis.
*
* <p>Example of a table function with row semantics input table parameter:
* We often need to read a CSV file, generally, the first line of the file
* contains a list of column names, and subsequent lines of the file contain
* data. The data in general can be treated as a large VARCHAR.
* However, some of the fields may be numeric or datetime.
*
* <p>A table function named CSVReader is designed to read a file of
* comma-separated values and interpret this file as a table.
* The function has three parameters:
* <ul>
* <li>The first parameter, File, is the name of a file on the query author's
* system. This file must contain the comma-separated values that are to be
* converted to a table. The first line of the file contains the names of
* the resulting columns. Succeeding lines contain the data. Each line after
* the first will result in one row of output, with column names as determined
* by the first line of the input.
* <li>Floats is a descriptor area, which should provide a list of the
* column names that are to be interpreted numerically.
* These columns will be output with the data type FLOAT.
* <li>Dates is a descriptor area, which provides a list of the column
* names that are to be interpreted as datetimes.
* These columns will be output with the data type DATE.
* </ul>
*
* <p>How to use this table function in query?
*
* <p>For a csv file which contents are:
* <blockquote><pre>
* docno,name,due_date,principle,interest
* 123,Mary,01/01/2014,234.56,345.67
* 234,Edgar,01/01/2014,654.32,543.21
* </pre></blockquote>
*
* <p>The query author may write a query such as the following:
* <blockquote><pre>{@code
* SELECT *
* FROM TABLE (
* CSVreader (
* 'abc.csv',
* DESCRIPTOR ("principle", "interest")
* DESCRIPTOR ("due_date")))
* }</pre></blockquote>
*
* <table>
* <caption>Results of the query</caption>
* <tr>
* <th>docno</th>
* <th>name</th>
* <th>due_date</th>
* <th>principle</th>
* <th>interest</th>
* </tr>
* <tr>
* <th>123</th>
* <th>Mary</th>
* <th>01/01/2014</th>
* <th>234.56</th>
* <th>345.67</th>
* </tr>
* <tr>
* <th>234</th>
* <th>Edgar</th>
* <th>01/01/2014</th>
* <th>654.32</th>
* <th>543.21</th>
* </tr>
* </table>
*
* <p>Set semantics means that the outcome of the function depends on how
* the data is partitioned. Set semantics is useful to implement user-defined
* analytics like aggregation or window functions.
* They operate on an entire table or a logical partition of it.
*
* <p>Example of a table function with set semantics input table parameter:
* TopN takes an input table that has been sorted on a numeric column.
* It copies the first n rows through to the output table. Any additional
* rows are summarized in a single output row in which the sort column has
* been summed and all other columns are null.
* TopN function has two parameters:
* <ul>
* <li>The first parameter, Input, is the input table. This table has set
* semantics, meaning that the result depends on the set of data (since the
* last row is a summary row). In addition, the table is marked as PRUNE WHEN
* EMPTY, meaning that the result is necessarily empty if the input is empty.
* The query author must order this input table on a single numeric column
* (syntax below).
* <li>The second parameter, Howmany, specifies how many input rows that the
* user wants to be copied into the output table; all rows after this will
* contribute to the final summary row in the output.
* </ul>
*
* <p>How to use this table function in query?
* <table>
* <caption>Original records of table orders</caption>
* <tr>
* <th>region</th>
* <th>product</th>
* <th>sales</th>
* </tr>
* <tr>
* <th>East</th>
* <th>A</th>
* <th>1234.56</th>
* </tr>
* <tr>
* <th>East</th>
* <th>B</th>
* <th>987.65</th>
* </tr>
* <tr>
* <th>East</th>
* <th>C</th>
* <th>876.54</th>
* </tr>
* <tr>
* <th>East</th>
* <th>D</th>
* <th>765.43</th>
* </tr>
* <tr>
* <th>East</th>
* <th>E</th>
* <th>654.32</th>
* </tr>
* <tr>
* <th>West</th>
* <th>E</th>
* <th>2345.67</th>
* </tr>
* <tr>
* <th>West</th>
* <th>D</th>
* <th>2001.33</th>
* </tr>
* <tr>
* <th>West</th>
* <th>C</th>
* <th>1357.99</th>
* </tr>
* <tr>
* <th>West</th>
* <th>B</th>
* <th>975.35</th>
* </tr>
* <tr>
* <th>West</th>
* <th>A</th>
* <th>864,22</th>
* </tr>
* </table>
*
* <p>The query author may write a query such as the following:
* <blockquote><pre>{@code
* SELECT *
* FROM TABLE(
* Topn(
* TABLE orders PARTITION BY region ORDER BY sales desc,
* 3))
* }</pre></blockquote>
*
* <p>The result will be:
* <table>
* <caption>Original records of table orders</caption>
* <tr>
* <th>region</th>
* <th>product</th>
* <th>sales</th>
* </tr>
* <tr>
* <th>East</th>
* <th>A</th>
* <th>1234.56</th>
* </tr>
* <tr>
* <th>East</th>
* <th>B</th>
* <th>987.65</th>
* </tr>
* <tr>
* <th>East</th>
* <th>C</th>
* <th>876.54</th>
* </tr>
* <tr>
* <th>West</th>
* <th>E</th>
* <th>2345.67</th>
* </tr>
* <tr>
* <th>West</th>
* <th>D</th>
* <th>2001.33</th>
* </tr>
* <tr>
* <th>West</th>
* <th>C</th>
* <th>1357.99</th>
* </tr>
* </table>
*
* <p>The second characteristic of input table parameter only applies to input
* table with set semantics. It specifies whether the table function can
* generate a result row even if the input table is empty.
*
* <p>The third characteristic is whether the input table supports
* pass-through columns or not.
*/
public class TableCharacteristic {
/**
* Input table has either row semantics or set semantics.
*/
public final Semantics semantics;
/**
* If the value is true, meaning that the DBMS can prune virtual processors
* from the query plan if the input table is empty.
* If the value is false, meaning that the DBMS must actually instantiate
* a virtual processor (or more than one virtual processor in the presence
* of other input tables).
*/
public final boolean pruneIfEmpty;
/**
* If the value is true, for each input row, the table function makes the
* entire input row available in the output, qualified by a range variable
* associated with the input table. Otherwise the value is false.
*/
public final boolean passColumnsThrough;
private TableCharacteristic(
Semantics semantics,
boolean pruneIfEmpty,
boolean passColumnsThrough) {
this.semantics = semantics;
this.pruneIfEmpty = pruneIfEmpty;
this.passColumnsThrough = passColumnsThrough;
}
/** Creates a builder. */
public static TableCharacteristic.Builder builder(Semantics semantics) {
return new Builder(semantics);
}
@Override public boolean equals(@Nullable Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
TableCharacteristic that = (TableCharacteristic) o;
return pruneIfEmpty == that.pruneIfEmpty
&& passColumnsThrough == that.passColumnsThrough
&& semantics == that.semantics;
}
@Override public int hashCode() {
return Objects.hash(semantics, pruneIfEmpty, passColumnsThrough);
}
@Override public String toString() {
return "TableCharacteristic{"
+ "semantics=" + semantics
+ ", pruneIfEmpty=" + pruneIfEmpty
+ ", passColumnsThrough=" + passColumnsThrough
+ '}';
}
/**
* Input table has either row semantics or set semantics.
*/
public enum Semantics {
/**
* Row semantics means that the result of the Window TableFunction
* is decided on a row-by-row basis.
* As an extreme example, the DBMS could atomize the input table into
* individual rows, and send each single row to a different virtual
* processor.
*/
ROW,
/**
* Set semantics means that the outcome of the Window TableFunction
* depends on how the data is partitioned.
* A partition may not be split across virtual processors, nor may a
* virtual processor handle more than one partition.
*/
SET
}
/**
* Builder for {@link TableCharacteristic}.
*/
public static class Builder {
private final Semantics semantics;
private boolean pruneIfEmpty = false;
private boolean passColumnsThrough = false;
/** Creates the semantics. */
private Builder(Semantics semantics) {
if (semantics == Semantics.ROW) {
// Tables with row semantics are always effectively prune when empty.
this.pruneIfEmpty = true;
}
this.semantics = semantics;
}
/** DBMS could prune virtual processors if the input table is empty. */
public Builder pruneIfEmpty() {
this.pruneIfEmpty = true;
return this;
}
/** Input table supports pass-through columns. */
public Builder passColumnsThrough() {
this.passColumnsThrough = true;
return this;
}
public TableCharacteristic build() {
return new TableCharacteristic(semantics, pruneIfEmpty, passColumnsThrough);
}
}
}