blob: d80b2fe4fb3f2ad7ffc6e40121385940782549f9 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.record.metadata;
import java.time.format.DateTimeFormatter;
import org.apache.drill.common.types.TypeProtos.DataMode;
import org.apache.drill.common.types.TypeProtos.MajorType;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.exec.record.MaterializedField;
import org.apache.drill.exec.vector.accessor.ColumnWriter;
/**
* Metadata description of a column including names, types and structure
* information.
*/
public interface ColumnMetadata extends Propertied {
/**
* Predicted number of elements per array entry. Default is
* taken from the often hard-coded value of 10.
*/
String EXPECTED_CARDINALITY_PROP = DRILL_PROP_PREFIX + "cardinality";
/**
* Default value represented as a string.
*/
String DEFAULT_VALUE_PROP = DRILL_PROP_PREFIX + "default";
/**
* Expected (average) width for variable-width columns.
*/
String EXPECTED_WIDTH_PROP = DRILL_PROP_PREFIX + "width";
/**
* Optional format to use when converting to/from string values.
*/
String FORMAT_PROP = DRILL_PROP_PREFIX + "format";
/**
* Indicates how to handle blanks. Must be one of the valid values defined
* in AbstractConvertFromString. Normally set on the converter by the plugin
* rather than by the user in the schema.
*/
String BLANK_AS_PROP = DRILL_PROP_PREFIX + "blank-as";
/**
* Convert blanks to null values (if the column is nullable), or
* fill with the default value (non-nullable.)
*/
String BLANK_AS_NULL = "null";
/**
* Convert blanks for numeric fields to 0. For non-numeric
* fields, convert to null (for nullable) or the default value
* (for non-nullable). Works best if non-numeric fields are declared
* as nullable.
*/
String BLANK_AS_ZERO = "0";
/**
* Indicates whether to project the column in a wildcard (*) query.
* Special columns may be excluded from projection. Certain "special"
* columns may be available only when explicitly requested. For example,
* the log reader has a "_raw" column which includes the entire input
* line before parsing. This column can be requested explicitly:<br>
* {@code SELECT foo, bar, _raw FROM ...}<br>
* but the column will <i>not</i> be included when using the wildcard:<br>
* {@code SELECT * FROM ...}
* <p>
* Marking a column (either in the provided schema or the reader schema)
* will prevent that column from appearing in a wildcard expansion.
*/
String EXCLUDE_FROM_WILDCARD = DRILL_PROP_PREFIX + "special";
int DEFAULT_ARRAY_SIZE = 10;
/**
* Indicates that a provided schema column is an implicit column
* (one defined by Drill rather than the reader.) Allows the implicit
* schema to reify partition names, say, as reader-specific names.
* For example, {@code dir0} might be reified as {@code year}, etc.
* <p>
* Available when the underlying reader supports implicit columns.
* The value is the defined implicit column name (not the name
* set via system/session options.) Using the defined name makes
* the provided schema immune from runtime changes to column names.
* <p>
* As the result of adding this feature, any column <i>not</i>
* tagged as implicit is a reader column, even if that column
* happens to have the same (currently selected runtime) name
* as an implicit column.
*/
String IMPLICIT_COL_TYPE = DRILL_PROP_PREFIX + "implicit";
/**
* Fully-qualified name implicit column type.
*/
String IMPLICIT_FQN = "fqn";
/**
* File path implicit column type.
*/
String IMPLICIT_FILEPATH = "filepath";
/**
* File name implicit column type.
*/
String IMPLICIT_FILENAME = "filename";
/**
* File suffix implicit column type.
*/
String IMPLICIT_SUFFIX = "suffix";
/**
* Prefix for partition directories. dir0 is the table root
* folder, dir1 the first subdirectory, and so on. Directories that
* don't exist in the actual file path take a {@code NULL} value.
*/
String IMPLICIT_PARTITION_PREFIX = "dir";
/**
* Rough characterization of Drill types into metadata categories.
* Various aspects of Drill's type system are very, very messy.
* However, Drill is defined by its code, not some abstract design,
* so the metadata system here does the best job it can to simplify
* the messy type system while staying close to the underlying
* implementation.
*/
enum StructureType {
/**
* Primitive column (all types except List, Map and Union.)
* Includes (one-dimensional) arrays of those types.
*/
PRIMITIVE,
/**
* Map or repeated map. Also describes the row as a whole.
*/
TUPLE,
/**
* Union or (non-repeated) list. (A non-repeated list is,
* essentially, a repeated union.)
*/
VARIANT,
/**
* A repeated list. A repeated list is not simply the repeated
* form of a list, it is something else entirely. It acts as
* a dimensional wrapper around any other type (except list)
* and adds a non-nullable extra dimension. Hence, this type is
* for 2D+ arrays.
* <p>
* In theory, a 2D list of, say, INT would be an INT column, but
* repeated in to dimensions. Alas, that is not how it is. Also,
* if we have a separate category for 2D lists, we should have
* a separate category for 1D lists. But, again, that is not how
* the code has evolved.
*/
MULTI_ARRAY,
/**
* Dict or repeated dict.
*/
DICT,
/**
* Unknown, specified at runtime. (Only for logical columns,
* not for physical columns.)
*/
DYNAMIC
}
StructureType structureType();
/**
* Schema for {@code TUPLE} columns.
*
* @return the tuple schema
*/
TupleMetadata tupleSchema();
/**
* Schema for {@code VARIANT} columns.
*
* @return the variant schema
*/
VariantMetadata variantSchema();
/**
* Schema of inner dimension for <code>MULTI_ARRAY</code> columns.
* If an array is 3D, the outer column represents all 3 dimensions.
* {@code outer.childSchema()} gives another {@code MULTI_ARRAY}
* for the inner 2D array.
* {@code outer.childSchema().childSchema()} gives a column
* of some other type (but repeated) for the 1D array.
* <p>
* Sorry for the mess, but it is how the code works and we are not
* in a position to revisit data type fundamentals.
*
* @return the description of the (n-1) st dimension.
*/
ColumnMetadata childSchema();
MaterializedField schema();
MaterializedField emptySchema();
String name();
MinorType type();
MajorType majorType();
DataMode mode();
int dimensions();
boolean isNullable();
boolean isArray();
boolean isVariableWidth();
boolean isMap();
boolean isVariant();
boolean isDict();
boolean isScalar();
/**
* Reports if the column is dynamic. A dynamic column is one with
* a "type to be named later." It is valid for describing a dynamic
* schema, but not for creating vectors; to create a vector the
* column must be resolved to a concrete type. The context should
* make it clear if any columns can be dynamic.
* @return {@code true} if the column does not yet have a concrete
* type, {@code false} if the column type is concrete
*/
boolean isDynamic();
/**
* Determine if the schema represents a column with a LIST type with
* UNION elements. (Lists can be of a single
* type (with nullable elements) or can be of unions.)
*
* @return true if the column is of type LIST of UNIONs
*/
boolean isMultiList();
/**
* Report whether one column is equivalent to another. Columns are equivalent
* if they have the same name, type and structure (ignoring internal structure
* such as properties.)
*/
boolean isEquivalent(ColumnMetadata other);
/**
* For variable-width columns, specify the expected column width to be used
* when allocating a new vector. Does nothing for fixed-width columns.
*
* @param width the expected column width
*/
void setExpectedWidth(int width);
/**
* Get the expected width for a column. This is the actual width for fixed-
* width columns, the specified width (defaulting to 50) for variable-width
* columns.
* @return the expected column width of the each data value. Does not include
* "overhead" space such as for the null-value vector or offset vector
*/
int expectedWidth();
/**
* For an array column, specify the expected average array cardinality.
* Ignored for non-array columns. Used when allocating new vectors.
*
* @param childCount the expected average array cardinality. Defaults to
* 1 for non-array columns, 10 for array columns
*/
void setExpectedElementCount(int childCount);
/**
* Returns the expected array cardinality for array columns, or 1 for
* non-array columns.
*
* @return the expected value cardinality per value (per-row for top-level
* columns, per array element for arrays within lists)
*/
int expectedElementCount();
void setFormat(String value);
String format();
/**
* Returns the formatter to use for date/time values. Only valid for
* date/time columns.
*
* @return
*/
DateTimeFormatter dateTimeFormatter();
/**
* Sets the default value property using the string-encoded form of the value.
* The default value is used for filling a vector when no real data is available.
*
* @param value the default value in String representation
*/
void setDefaultValue(String value);
/**
* Returns the default value for this column in String literal representation.
*
* @return the default value in String literal representation, or null if no
* default value has been set
*/
String defaultValue();
/**
* Returns the default value decoded into object form. This is the same as:
* <pre><code>decodeValue(defaultValue());
* </code></pre>
*
* @return the default value decode as an object that can be passed to
* the {@link ColumnWriter#setObject()} method.
*/
Object decodeDefaultValue();
String valueToString(Object value);
Object valueFromString(String value);
/**
* Create an empty version of this column. If the column is a scalar,
* produces a simple copy. If a map, produces a clone without child
* columns.
*
* @return empty clone of this column
*/
ColumnMetadata cloneEmpty();
int precision();
int scale();
void bind(TupleMetadata parentTuple);
ColumnMetadata copy();
/**
* Converts type metadata into string representation
* accepted by the table schema parser.
*
* @return type metadata string representation
*/
String typeString();
/**
* Converts column metadata into string representation
* accepted by the table schema parser.
*
* @return column metadata string representation
*/
String columnString();
}