| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to you under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.calcite.sql; |
| |
| import org.checkerframework.checker.nullness.qual.Nullable; |
| |
| import java.util.Objects; |
| |
| /** |
| * A table-valued input parameter of a table function is classified by three |
| * characteristics. |
| * |
| * <p>The first characteristic is semantics. The table has either |
| * row semantics or set semantics. |
| * |
| * <p>Row semantics means that the result of the table function is decided |
| * on a row-by-row basis. |
| * |
| * <p>Example of a table function with row semantics input table parameter: |
| * We often need to read a CSV file, generally, the first line of the file |
| * contains a list of column names, and subsequent lines of the file contain |
| * data. The data in general can be treated as a large VARCHAR. |
| * However, some of the fields may be numeric or datetime. |
| * |
| * <p>A table function named CSVReader is designed to read a file of |
| * comma-separated values and interpret this file as a table. |
| * The function has three parameters: |
| * <ul> |
| * <li>The first parameter, File, is the name of a file on the query author's |
| * system. This file must contain the comma-separated values that are to be |
| * converted to a table. The first line of the file contains the names of |
| * the resulting columns. Succeeding lines contain the data. Each line after |
| * the first will result in one row of output, with column names as determined |
| * by the first line of the input. |
| * <li>Floats is a descriptor area, which should provide a list of the |
| * column names that are to be interpreted numerically. |
| * These columns will be output with the data type FLOAT. |
| * <li>Dates is a descriptor area, which provides a list of the column |
| * names that are to be interpreted as datetimes. |
| * These columns will be output with the data type DATE. |
| * </ul> |
| * |
| * <p>How to use this table function in query? |
| * |
| * <p>For a csv file which contents are: |
| * <blockquote><pre> |
| * docno,name,due_date,principle,interest |
| * 123,Mary,01/01/2014,234.56,345.67 |
| * 234,Edgar,01/01/2014,654.32,543.21 |
| * </pre></blockquote> |
| * |
| * <p>The query author may write a query such as the following: |
| * <blockquote><pre>{@code |
| * SELECT * |
| * FROM TABLE ( |
| * CSVreader ( |
| * 'abc.csv', |
| * DESCRIPTOR ("principle", "interest") |
| * DESCRIPTOR ("due_date"))) |
| * }</pre></blockquote> |
| * |
| * <table> |
| * <caption>Results of the query</caption> |
| * <tr> |
| * <th>docno</th> |
| * <th>name</th> |
| * <th>due_date</th> |
| * <th>principle</th> |
| * <th>interest</th> |
| * </tr> |
| * <tr> |
| * <th>123</th> |
| * <th>Mary</th> |
| * <th>01/01/2014</th> |
| * <th>234.56</th> |
| * <th>345.67</th> |
| * </tr> |
| * <tr> |
| * <th>234</th> |
| * <th>Edgar</th> |
| * <th>01/01/2014</th> |
| * <th>654.32</th> |
| * <th>543.21</th> |
| * </tr> |
| * </table> |
| * |
| * <p>Set semantics means that the outcome of the function depends on how |
| * the data is partitioned. Set semantics is useful to implement user-defined |
| * analytics like aggregation or window functions. |
| * They operate on an entire table or a logical partition of it. |
| * |
| * <p>Example of a table function with set semantics input table parameter: |
| * TopN takes an input table that has been sorted on a numeric column. |
| * It copies the first n rows through to the output table. Any additional |
| * rows are summarized in a single output row in which the sort column has |
| * been summed and all other columns are null. |
| * TopN function has two parameters: |
| * <ul> |
| * <li>The first parameter, Input, is the input table. This table has set |
| * semantics, meaning that the result depends on the set of data (since the |
| * last row is a summary row). In addition, the table is marked as PRUNE WHEN |
| * EMPTY, meaning that the result is necessarily empty if the input is empty. |
| * The query author must order this input table on a single numeric column |
| * (syntax below). |
| * <li>The second parameter, Howmany, specifies how many input rows that the |
| * user wants to be copied into the output table; all rows after this will |
| * contribute to the final summary row in the output. |
| * </ul> |
| * |
| * <p>How to use this table function in query? |
| * <table> |
| * <caption>Original records of table orders</caption> |
| * <tr> |
| * <th>region</th> |
| * <th>product</th> |
| * <th>sales</th> |
| * </tr> |
| * <tr> |
| * <th>East</th> |
| * <th>A</th> |
| * <th>1234.56</th> |
| * </tr> |
| * <tr> |
| * <th>East</th> |
| * <th>B</th> |
| * <th>987.65</th> |
| * </tr> |
| * <tr> |
| * <th>East</th> |
| * <th>C</th> |
| * <th>876.54</th> |
| * </tr> |
| * <tr> |
| * <th>East</th> |
| * <th>D</th> |
| * <th>765.43</th> |
| * </tr> |
| * <tr> |
| * <th>East</th> |
| * <th>E</th> |
| * <th>654.32</th> |
| * </tr> |
| * <tr> |
| * <th>West</th> |
| * <th>E</th> |
| * <th>2345.67</th> |
| * </tr> |
| * <tr> |
| * <th>West</th> |
| * <th>D</th> |
| * <th>2001.33</th> |
| * </tr> |
| * <tr> |
| * <th>West</th> |
| * <th>C</th> |
| * <th>1357.99</th> |
| * </tr> |
| * <tr> |
| * <th>West</th> |
| * <th>B</th> |
| * <th>975.35</th> |
| * </tr> |
| * <tr> |
| * <th>West</th> |
| * <th>A</th> |
| * <th>864,22</th> |
| * </tr> |
| * </table> |
| * |
| * <p>The query author may write a query such as the following: |
| * <blockquote><pre>{@code |
| * SELECT * |
| * FROM TABLE( |
| * Topn( |
| * TABLE orders PARTITION BY region ORDER BY sales desc, |
| * 3)) |
| * }</pre></blockquote> |
| * |
| * <p>The result will be: |
| * <table> |
| * <caption>Original records of table orders</caption> |
| * <tr> |
| * <th>region</th> |
| * <th>product</th> |
| * <th>sales</th> |
| * </tr> |
| * <tr> |
| * <th>East</th> |
| * <th>A</th> |
| * <th>1234.56</th> |
| * </tr> |
| * <tr> |
| * <th>East</th> |
| * <th>B</th> |
| * <th>987.65</th> |
| * </tr> |
| * <tr> |
| * <th>East</th> |
| * <th>C</th> |
| * <th>876.54</th> |
| * </tr> |
| * <tr> |
| * <th>West</th> |
| * <th>E</th> |
| * <th>2345.67</th> |
| * </tr> |
| * <tr> |
| * <th>West</th> |
| * <th>D</th> |
| * <th>2001.33</th> |
| * </tr> |
| * <tr> |
| * <th>West</th> |
| * <th>C</th> |
| * <th>1357.99</th> |
| * </tr> |
| * </table> |
| * |
| * <p>The second characteristic of input table parameter only applies to input |
| * table with set semantics. It specifies whether the table function can |
| * generate a result row even if the input table is empty. |
| * |
| * <p>The third characteristic is whether the input table supports |
| * pass-through columns or not. |
| */ |
| public class TableCharacteristic { |
| |
| /** |
| * Input table has either row semantics or set semantics. |
| */ |
| public final Semantics semantics; |
| |
| /** |
| * If the value is true, meaning that the DBMS can prune virtual processors |
| * from the query plan if the input table is empty. |
| * If the value is false, meaning that the DBMS must actually instantiate |
| * a virtual processor (or more than one virtual processor in the presence |
| * of other input tables). |
| */ |
| public final boolean pruneIfEmpty; |
| |
| /** |
| * If the value is true, for each input row, the table function makes the |
| * entire input row available in the output, qualified by a range variable |
| * associated with the input table. Otherwise the value is false. |
| */ |
| public final boolean passColumnsThrough; |
| |
| private TableCharacteristic( |
| Semantics semantics, |
| boolean pruneIfEmpty, |
| boolean passColumnsThrough) { |
| this.semantics = semantics; |
| this.pruneIfEmpty = pruneIfEmpty; |
| this.passColumnsThrough = passColumnsThrough; |
| } |
| |
| /** Creates a builder. */ |
| public static TableCharacteristic.Builder builder(Semantics semantics) { |
| return new Builder(semantics); |
| } |
| |
| @Override public boolean equals(@Nullable Object o) { |
| if (this == o) { |
| return true; |
| } |
| if (o == null || getClass() != o.getClass()) { |
| return false; |
| } |
| TableCharacteristic that = (TableCharacteristic) o; |
| return pruneIfEmpty == that.pruneIfEmpty |
| && passColumnsThrough == that.passColumnsThrough |
| && semantics == that.semantics; |
| } |
| |
| @Override public int hashCode() { |
| return Objects.hash(semantics, pruneIfEmpty, passColumnsThrough); |
| } |
| |
| @Override public String toString() { |
| return "TableCharacteristic{" |
| + "semantics=" + semantics |
| + ", pruneIfEmpty=" + pruneIfEmpty |
| + ", passColumnsThrough=" + passColumnsThrough |
| + '}'; |
| } |
| |
| /** |
| * Input table has either row semantics or set semantics. |
| */ |
| public enum Semantics { |
| /** |
| * Row semantics means that the result of the Window TableFunction |
| * is decided on a row-by-row basis. |
| * As an extreme example, the DBMS could atomize the input table into |
| * individual rows, and send each single row to a different virtual |
| * processor. |
| */ |
| ROW, |
| |
| /** |
| * Set semantics means that the outcome of the Window TableFunction |
| * depends on how the data is partitioned. |
| * A partition may not be split across virtual processors, nor may a |
| * virtual processor handle more than one partition. |
| */ |
| SET |
| } |
| |
| /** |
| * Builder for {@link TableCharacteristic}. |
| */ |
| public static class Builder { |
| private final Semantics semantics; |
| private boolean pruneIfEmpty = false; |
| private boolean passColumnsThrough = false; |
| |
| /** Creates the semantics. */ |
| private Builder(Semantics semantics) { |
| if (semantics == Semantics.ROW) { |
| // Tables with row semantics are always effectively prune when empty. |
| this.pruneIfEmpty = true; |
| } |
| this.semantics = semantics; |
| } |
| |
| /** DBMS could prune virtual processors if the input table is empty. */ |
| public Builder pruneIfEmpty() { |
| this.pruneIfEmpty = true; |
| return this; |
| } |
| |
| /** Input table supports pass-through columns. */ |
| public Builder passColumnsThrough() { |
| this.passColumnsThrough = true; |
| return this; |
| } |
| |
| public TableCharacteristic build() { |
| return new TableCharacteristic(semantics, pruneIfEmpty, passColumnsThrough); |
| } |
| } |
| } |