blob: 933aa6647d4b4d3a5189dde7e245e4df6849c10b [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.kudu;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.yetus.audience.InterfaceStability;
import org.apache.kudu.Common.EncodingType;
import org.apache.kudu.Compression.CompressionType;
import org.apache.kudu.util.CharUtil;
/**
* Represents a Kudu Table column. Use {@link ColumnSchema.ColumnSchemaBuilder} in order to
* create columns.
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public class ColumnSchema {
private final String name;
private final Type type;
private final boolean key;
private final boolean nullable;
private final Object defaultValue;
private final int desiredBlockSize;
private final Encoding encoding;
private final CompressionAlgorithm compressionAlgorithm;
private final ColumnTypeAttributes typeAttributes;
private final int typeSize;
private final Common.DataType wireType;
private final String comment;
/**
* Specifies the encoding of data for a column on disk.
* Not all encodings are available for all data types.
* Refer to the Kudu documentation for more information on each encoding.
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public enum Encoding {
UNKNOWN(EncodingType.UNKNOWN_ENCODING),
AUTO_ENCODING(EncodingType.AUTO_ENCODING),
PLAIN_ENCODING(EncodingType.PLAIN_ENCODING),
PREFIX_ENCODING(EncodingType.PREFIX_ENCODING),
GROUP_VARINT(EncodingType.GROUP_VARINT),
RLE(EncodingType.RLE),
DICT_ENCODING(EncodingType.DICT_ENCODING),
BIT_SHUFFLE(EncodingType.BIT_SHUFFLE);
final EncodingType internalPbType;
Encoding(EncodingType internalPbType) {
this.internalPbType = internalPbType;
}
@InterfaceAudience.Private
public EncodingType getInternalPbType() {
return internalPbType;
}
}
/**
* Specifies the compression algorithm of data for a column on disk.
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public enum CompressionAlgorithm {
UNKNOWN(CompressionType.UNKNOWN_COMPRESSION),
DEFAULT_COMPRESSION(CompressionType.DEFAULT_COMPRESSION),
NO_COMPRESSION(CompressionType.NO_COMPRESSION),
SNAPPY(CompressionType.SNAPPY),
LZ4(CompressionType.LZ4),
ZLIB(CompressionType.ZLIB);
final CompressionType internalPbType;
CompressionAlgorithm(CompressionType internalPbType) {
this.internalPbType = internalPbType;
}
@InterfaceAudience.Private
public CompressionType getInternalPbType() {
return internalPbType;
}
}
private ColumnSchema(String name, Type type, boolean key, boolean nullable,
Object defaultValue, int desiredBlockSize, Encoding encoding,
CompressionAlgorithm compressionAlgorithm,
ColumnTypeAttributes typeAttributes, Common.DataType wireType,
String comment) {
this.name = name;
this.type = type;
this.key = key;
this.nullable = nullable;
this.defaultValue = defaultValue;
this.desiredBlockSize = desiredBlockSize;
this.encoding = encoding;
this.compressionAlgorithm = compressionAlgorithm;
this.typeAttributes = typeAttributes;
this.typeSize = type.getSize(typeAttributes);
this.wireType = wireType;
this.comment = comment;
}
/**
* Get the column's Type
* @return the type
*/
public Type getType() {
return type;
}
/**
* Get the column's name
* @return A string representation of the name
*/
public String getName() {
return name;
}
/**
* Answers if the column part of the key
* @return true if the column is part of the key, else false
*/
public boolean isKey() {
return key;
}
/**
* Answers if the column can be set to null
* @return true if it can be set to null, else false
*/
public boolean isNullable() {
return nullable;
}
/**
* The Java object representation of the default value that's read
* @return the default read value
*/
public Object getDefaultValue() {
return defaultValue;
}
/**
* Gets the desired block size for this column.
* If no block size has been explicitly specified for this column,
* returns 0 to indicate that the server-side default will be used.
*
* @return the block size, in bytes, or 0 if none has been configured.
*/
public int getDesiredBlockSize() {
return desiredBlockSize;
}
/**
* Return the encoding of this column, or null if it is not known.
*/
public Encoding getEncoding() {
return encoding;
}
/**
* Return the compression algorithm of this column, or null if it is not known.
*/
public CompressionAlgorithm getCompressionAlgorithm() {
return compressionAlgorithm;
}
/**
* Return the column type attributes for the column, or null if it is not known.
*/
public ColumnTypeAttributes getTypeAttributes() {
return typeAttributes;
}
/**
* Get the column's underlying DataType.
*/
@InterfaceAudience.Private
public Common.DataType getWireType() {
return wireType;
}
/**
* The size of this type in bytes on the wire.
* @return A size
*/
public int getTypeSize() {
return typeSize;
}
/**
* Return the comment for the column. An empty string means there is no comment.
*/
public String getComment() {
return comment;
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (!(o instanceof ColumnSchema)) {
return false;
}
ColumnSchema that = (ColumnSchema) o;
return Objects.equals(name, that.name) &&
Objects.equals(type, that.type) &&
Objects.equals(key, that.key) &&
Objects.equals(typeAttributes, that.typeAttributes) &&
Objects.equals(comment, that.comment);
}
@Override
public int hashCode() {
return Objects.hash(name, type, key, typeAttributes, comment);
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("Column name: ");
sb.append(name);
sb.append(", type: ");
sb.append(type.getName());
if (typeAttributes != null) {
sb.append(typeAttributes.toStringForType(type));
}
if (!comment.isEmpty()) {
sb.append(", comment: ");
sb.append(comment);
}
return sb.toString();
}
/**
* Builder for ColumnSchema.
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public static class ColumnSchemaBuilder {
private static final List<Type> TYPES_WITH_ATTRIBUTES = Arrays.asList(Type.DECIMAL,
Type.VARCHAR);
private final String name;
private final Type type;
private boolean key = false;
private boolean nullable = false;
private Object defaultValue = null;
private int desiredBlockSize = 0;
private Encoding encoding = null;
private CompressionAlgorithm compressionAlgorithm = null;
private ColumnTypeAttributes typeAttributes = null;
private Common.DataType wireType = null;
private String comment = "";
/**
* Constructor for the required parameters.
* @param name column's name
* @param type column's type
*/
public ColumnSchemaBuilder(String name, Type type) {
this.name = name;
this.type = type;
}
/**
* Constructor to copy an existing columnSchema
* @param that the columnSchema to copy
*/
public ColumnSchemaBuilder(ColumnSchema that) {
this.name = that.name;
this.type = that.type;
this.key = that.key;
this.nullable = that.nullable;
this.defaultValue = that.defaultValue;
this.desiredBlockSize = that.desiredBlockSize;
this.encoding = that.encoding;
this.compressionAlgorithm = that.compressionAlgorithm;
this.typeAttributes = that.typeAttributes;
this.wireType = that.wireType;
this.comment = that.comment;
}
/**
* Sets if the column is part of the row key. False by default.
* @param key a boolean that indicates if the column is part of the key
* @return this instance
*/
public ColumnSchemaBuilder key(boolean key) {
this.key = key;
return this;
}
/**
* Marks the column as allowing null values. False by default.
* <p>
* <strong>NOTE:</strong> the "not-nullable-by-default" behavior here differs from
* the equivalent API in the Python and C++ clients. It also differs from the
* standard behavior of SQL <code>CREATE TABLE</code> statements. It is
* recommended to always specify nullability explicitly using this API
* in order to avoid confusion.
*
* @param nullable a boolean that indicates if the column allows null values
* @return this instance
*/
public ColumnSchemaBuilder nullable(boolean nullable) {
this.nullable = nullable;
return this;
}
/**
* Sets the default value that will be read from the column. Null by default.
* @param defaultValue a Java object representation of the default value that's read
* @return this instance
*/
public ColumnSchemaBuilder defaultValue(Object defaultValue) {
this.defaultValue = defaultValue;
return this;
}
/**
* Set the desired block size for this column.
*
* This is the number of bytes of user data packed per block on disk, and
* represents the unit of IO when reading this column. Larger values
* may improve scan performance, particularly on spinning media. Smaller
* values may improve random access performance, particularly for workloads
* that have high cache hit rates or operate on fast storage such as SSD.
*
* Note that the block size specified here corresponds to uncompressed data.
* The actual size of the unit read from disk may be smaller if
* compression is enabled.
*
* It's recommended that this not be set any lower than 4096 (4KB) or higher
* than 1048576 (1MB).
* @param desiredBlockSize the desired block size, in bytes
* @return this instance
* <!-- TODO(KUDU-1107): move the above info to docs -->
*/
public ColumnSchemaBuilder desiredBlockSize(int desiredBlockSize) {
this.desiredBlockSize = desiredBlockSize;
return this;
}
/**
* Set the block encoding for this column. See the documentation for the list
* of valid options.
*/
public ColumnSchemaBuilder encoding(Encoding encoding) {
this.encoding = encoding;
return this;
}
/**
* Set the compression algorithm for this column. See the documentation for the list
* of valid options.
*/
public ColumnSchemaBuilder compressionAlgorithm(CompressionAlgorithm compressionAlgorithm) {
this.compressionAlgorithm = compressionAlgorithm;
return this;
}
/**
* Set the column type attributes for this column.
*/
public ColumnSchemaBuilder typeAttributes(ColumnTypeAttributes typeAttributes) {
if (typeAttributes != null && !TYPES_WITH_ATTRIBUTES.contains(type)) {
throw new IllegalArgumentException(
"ColumnTypeAttributes are not used on " + type + " columns");
}
this.typeAttributes = typeAttributes;
return this;
}
/**
* Allows an alternate {@link Common.DataType} to override the {@link Type}
* when serializing the ColumnSchema on the wire.
* This is useful for virtual columns specified by their type such as
* {@link Common.DataType#IS_DELETED}.
*/
@InterfaceAudience.Private
public ColumnSchemaBuilder wireType(Common.DataType wireType) {
this.wireType = wireType;
return this;
}
/**
* Set the comment for this column.
*/
public ColumnSchemaBuilder comment(String comment) {
this.comment = comment;
return this;
}
/**
* Builds a {@link ColumnSchema} using the passed parameters.
* @return a new {@link ColumnSchema}
*/
public ColumnSchema build() {
// Set the wire type if it wasn't explicitly set.
if (wireType == null) {
this.wireType = type.getDataType(typeAttributes);
}
if (type == Type.VARCHAR) {
if (typeAttributes == null || !typeAttributes.hasLength() ||
typeAttributes.getLength() < CharUtil.MIN_VARCHAR_LENGTH ||
typeAttributes.getLength() > CharUtil.MAX_VARCHAR_LENGTH) {
throw new IllegalArgumentException(
String.format("VARCHAR's length must be set and between %d and %d",
CharUtil.MIN_VARCHAR_LENGTH, CharUtil.MAX_VARCHAR_LENGTH));
}
}
return new ColumnSchema(name, type,
key, nullable, defaultValue,
desiredBlockSize, encoding, compressionAlgorithm,
typeAttributes, wireType, comment);
}
}
}