blob: 521d444d377d1b8cae3bb9331566e385c8c4716f [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* cdbparquetcolum.h
*
* Created on: Sep 29, 2013
* Author: malili
*/
#ifndef CDBPARQUETCOLUM_H_
#define CDBPARQUETCOLUM_H_
#include "cdb/cdbparquetstoragewrite.h"
#define DAFAULT_DATAPAGE_NUM_PER_COLUMNCHUNK 10
/*
* we call it CurrentDefinitionLevel because definition level is used to
* determine whether the current value is null, the MACRO is used before
* we consume() the current r/d/value triplet.
*
* we call it NextRepetitionLevel because we use next triplet's repetition
* level to decide next value is repeated on which level, the MACRO is used
* after the current triplet has been consume().
*
* `repetitionLevel` is guaranteed to be 0 after we consumed the last value.
*/
#define CurrentDefinitionLevel(columnReader) (columnReader)->definitionLevel
#define NextRepetitionLevel(columnReader) (columnReader)->repetitionLevel
typedef struct ParquetColumnReader
{
MemoryContext memoryContext;
struct ColumnChunkMetadata_4C *columnMetadata;
ParquetDataPage dataPages;
ParquetDataPage currentPage;
int currentPageValueRemained;/*the number of values remained for read*/
int dataPageNum;
int dataPageCapacity;
int dataPageProcessed;/*the number of data pages have already been processed*/
int repetitionLevel;
int definitionLevel;
/*
* dataBuffer stores column chunk's raw data read from file.
* This buffer is reused accross multiple row group.
*/
char *dataBuffer;
int32 dataLen;
/*
* For compressed non-repeatable (r == 0) column, we reuse a shared buffer
* `pageBuffer` to store decompressed content for each page to save memory.
*
* For compressed repeatable (r > 0) column, we must keep each page's
* decompressed content until FinishScanColumn since column values of
* one record may span multiple pages.
*
* For uncompressed column, all data can be read from page->data, which
* just points to some location at `dataBuffer` above.
*/
char *pageBuffer;
int32 pageBufferLen;
/*buffer reused for embedded type, avoid palloc each time for each tuple*/
void *geoval;
} ParquetColumnReader;
extern void ParquetExecutorReadColumn(
ParquetColumnReader *columnReaders,
File file);
extern void ParquetColumnReader_readValue(ParquetColumnReader *columnReader,
Datum *value, bool *null, int hawqTypeID);
extern void ParquetColumnReader_readPoint(ParquetColumnReader readers[], Datum *value, bool *null);
extern void ParquetColumnReader_readLSEG(ParquetColumnReader readers[], Datum *value, bool *null);
extern void ParquetColumnReader_readPATH(ParquetColumnReader readers[], Datum *value, bool *null);
extern void ParquetColumnReader_readBOX(ParquetColumnReader readers[], Datum *value, bool *null);
extern void ParquetColumnReader_readPOLYGON(ParquetColumnReader readers[], Datum *value, bool *null);
extern void ParquetColumnReader_readCIRCLE(ParquetColumnReader readers[], Datum *value, bool *null);
extern void ParquetColumnReader_FinishedScanColumn(ParquetColumnReader *columnReader);
#endif /* CDBPARQUETCOLUM_H_ */