blob: 92e2c8e1dd4b07b85adde590c8c5597de587f164 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* MetadataInterface.h
*
* Created on: Jul 1, 2013
* Author: malili
*/
#ifndef METADATAINTERFACE_H_
#define METADATAINTERFACE_H_
#include <stdint.h>
typedef enum CompressionCodecName
{
UNCOMPRESSED, SNAPPY, GZIP, LZO
} CompressionCodecName;
typedef enum Encoding
{
PLAIN, GROUP_VAR_INT, PLAIN_DICTIONARY, RLE, BIT_PACKED
} Encoding;// Encoding;
typedef enum PrimitiveTypeName
{
BOOLEAN,
INT32,
INT64,
INT96,
FLOAT,
DOUBLE,
BINARY,
FIXED_LEN_BYTE_ARRAY
} PrimitiveTypeName;
typedef enum RepetitionType
{
REQUIRED = 0,
OPTIONAL = 1,
REPEATED = 2
} RepetitionType;
typedef enum PageType
{
DATA_PAGE = 0,
INDEX_PAGE = 1,
DICTIONARY_PAGE = 2
} PageType;
/**
* the field description, maybe a primitive type, or a group type corresponding to a nested type.
* For primitive type, num_children equals 0, and children points to NULL;
* For group type(internal type), type equals NULL.
*/
typedef struct FileField_4C
{
char *name;
int typeLength;
enum PrimitiveTypeName type;
enum RepetitionType repetitionType;
int hawqTypeId; /*hawq type id*/
int num_children;
struct FileField_4C *children;
int r; /*repetition level of this field*/
int d; /*definition level of this field*/
int depth; /*depth of path in schema*/
char *pathInSchema;
} FileField_4C;
typedef struct ColumnChunkMetadata_4C
{
enum CompressionCodecName codec;
char *path;
char *colName;
char *pathInSchema;
int hawqTypeId;
int r; /*max repetition level of this column*/
int d; /*max definition level of this column*/
int depth; /*depth of path in schema*/
enum PrimitiveTypeName type;
enum Encoding* pEncodings;
int EncodingCount;
/* Byte offset in file_path to the ColumnMetaData */
int64_t file_offset;
int64_t firstDataPage;
long valueCount;
/* total byte size of all compressed pages in this column chunk (including the headers) */
int64_t totalSize;
/* total byte size of all uncompressed pages in this column chunk (including the headers) */
int64_t totalUncompressedSize;
} ColumnChunkMetadata_4C;
/* rowgroup metadata */
typedef struct BlockMetadata_4C
{
struct ColumnChunkMetadata_4C* columns;
int ColChunkCount;
long rowCount;
/* Total byte size of all the uncompressed column data in this row group */
long totalByteSize;
} BlockMetadata_4C;
typedef struct ParquetMetadata_4C
{
struct FileField_4C* pfield; /*The first level of field, the field may have children itself*/
int fieldCount; /*first level field count*/
long num_rows; /*number of rows*/
struct BlockMetadata_4C *currentBlockMD; /*the metadata of current row group*/
int blockCount; /*count of row groups*/
char *hawqschemastr; /*hawq schema str, should output into parquet metadata keyvalue part*/
int colCount; /*The number of columns in each row group, which is the expanded columns of pfield*/
int schemaTreeNodeCount; /*the count of all the nodes in schema tree, including middle nodes and leaf nodes*/
int *estimateChunkSizes; /* array of estimated sizes for each columnchunk */
int version; /*the version of parquet file*/
} ParquetMetadata_4C;
typedef struct PageMetadata_4C
{
int32_t num_values;
enum Encoding encoding;
enum Encoding definition_level_encoding;
enum Encoding repetition_level_encoding;
enum PageType page_type;
/* Uncompressed page size in bytes (not including this header) */
int32_t uncompressed_page_size;
/* Compressed page size in bytes (not including this header) */
int32_t compressed_page_size;
int32_t crc;
} PageMetadata_4C;
#ifdef __cplusplus
extern "C" {
#endif
int readPageMetadata(uint8_t *buf,
uint32_t *len,
int compact,
struct PageMetadata_4C** ppageMetdata);
int writePageMetadata(uint8_t **buf,
uint32_t *len,
struct PageMetadata_4C* ppageMetadata);
int writeColumnChunkMetadata(uint8_t **buf,
uint32_t *len,
struct ColumnChunkMetadata_4C* columnChunkMetadata);
#ifdef __cplusplus
}
#endif
#endif /* METADATAINTERFACE_H_ */