blob: 33bb14e9ff1fdc26efadaf747511a6bb6cb391f8 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/**
* File format description for carbon schema file
*/
namespace java org.apache.carbondata.format
/**
* The types supported by Carbon Data.
*/
enum DataType {
STRING = 0,
SHORT = 1,
INT = 2,
LONG = 3,
DOUBLE = 4,
DECIMAL = 5,
TIMESTAMP = 6,
ARRAY = 20,
STRUCT = 21,
}
/**
* Encodings supported by Carbon Data. Not all encodings are valid for all types.
* Certain Encodings can be chained.
*/
enum Encoding{
DICTIONARY = 0; // Identified that a column is dictionary encoded
DELTA = 1; // Identifies that a column delta encoded
RLE = 2; // Indetifies that a column is run length encoded
INVERTED_INDEX = 3; // identifies that a column is encoded using inverted index, can be used only along with dictionary encoding
BIT_PACKED = 4; // identifies that a column is encoded using bit packing, can be used only along with dictionary encoding
DIRECT_DICTIONARY = 5; // Identifies that a column is direct dictionary encoded
}
/**
* Description of a Column for both dimension and measure
*/
//TODO:where to put the CSV column name and carbon table column name mapping? should not keep in schema
struct ColumnSchema{
1: required DataType data_type;
/**
* Name of the column. If it is a complex data type, we follow a naming rule grand_parent_column.parent_column.child_column
* For Array types, two columns will be stored one for the array type and one for the primitive type with the name parent_column.value
*/
2: required string column_name; //
3: required string column_id; // Unique ID for a column. if this is dimension, it is an unique ID that used in dictionary
4: required bool columnar; // wether it is stored as columnar format or row format
5: required list<Encoding> encoders; // List of encoders that are chained to encode the data for this column
6: required bool dimension; // Whether the column is a dimension or measure
7: optional i32 column_group_id; // The group ID for column used for row format columns, where in columns in each group are chunked together.
/**
* Used when this column contains decimal data.
*/
8: optional i32 scale;
9: optional i32 precision;
/** Nested fields. Since thrift does not support nested fields,
* the nesting is flattened to a single list by a depth-first traversal.
* The children count is used to construct the nested relationship.
* This field is not set when the element is a primitive type
*/
10: optional i32 num_child;
/**
* Used when this column is part of an aggregate table.
*/
11: optional string aggregate_function;
12: optional binary default_value;
13: optional map<string,string> columnProperties;
/**
* To specify the visibily of the column by default its false
*/
14: optional bool invisible;
/**
* column reference id
*/
15: optional string columnReferenceId;
}
/**
* Description of One Schema Change, contains list of added columns and deleted columns
*/
struct SchemaEvolutionEntry{
1: required i64 time_stamp;
2: optional list<ColumnSchema> added;
3: optional list<ColumnSchema> removed;
}
/**
* History of schema evolution
*/
struct SchemaEvolution{
1: required list<SchemaEvolutionEntry> schema_evolution_history;
}
/**
* The description of table schema
*/
struct TableSchema{
1: required string table_id; // ID used to
2: required list<ColumnSchema> table_columns; // Columns in the table
3: required SchemaEvolution schema_evolution; // History of schema evolution of this table
4: optional map<string,string> tableProperties; // table properties configured bu the user
}
struct TableInfo{
1: required TableSchema fact_table;
2: required list<TableSchema> aggregate_table_list;
}