blob: c7298d9b481247dd22717836e9d1ff2a838d030b [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* cdbparquetam.h
*
* Created on: Jul 4, 2013
* Author: malili
*/
#ifndef CDBPARQUETAM_H_
#define CDBPARQUETAM_H_
#include "access/parquetsegfiles.h"
#include "access/parquetmetadata_c++/MetadataInterface.h"
#include "cdb/cdbappendonlystoragelayer.h"
#include "cdb/cdbparquetstorageread.h"
#include "cdb/cdbparquetstoragewrite.h"
#include "cdb/cdbmirroredappendonly.h"
#include "executor/tuptable.h"
#include "access/appendonlytid.h"
#include "commands/copy.h"
#include "cdb/cdbparquetcolumn.h"
#include "cdb/cdbparquetrowgroup.h"
#include "nodes/relation.h"
/*
* used for scan of parquet relations
*/
typedef struct ParquetScanDescData {
/* scan parameters */
Relation pqs_rd; /* target relation descriptor */
TupleDesc pqs_tupDesc;
/* file segment scan state */
int pqs_filenamepath_maxlen;
char *pqs_filenamepath; /* the current segment file pathname. */
int pqs_splits_processed; /* num of segfiles already processed */
bool pqs_need_new_split;
bool pqs_done_all_splits;
/*the projection of columns, which column need to be scanned*/
bool *proj;
/* synthetic system attributes */
ItemPointerData cdb_fake_ctid;
int64 cur_seg_row;
/*the projection of hawq attribute mapping to parquet column chunks. For each hawq column,
*there is an int array which stores corresponding parquet column chunks*/
int *hawqAttrToParquetColChunks;
MemoryContext parquetScanInitContext; /* mem context at init time */
ParquetRowGroupReader rowGroupReader;
/* current scan state */
bool bufferDone;
bool initedStorageRoutines;
AppendOnlyStorageAttributes storageAttributes;
ParquetStorageRead storageRead;
QueryContextDispatchingSendBack sendback;
AppendOnlyEntry *aoEntry;
List *splits;
bool toCloseFile; // identify if it's ready to close segment file
RuntimeFilterState *rfState; /* Bloom filter */
} ParquetScanDescData;
typedef ParquetScanDescData *ParquetScanDesc;
/*
* ParquetInsertDescData is used for storing state related
* to inserting data into a writable parquet table.
*/
typedef struct ParquetInsertDescData {
MemoryContext memoryContext;
Relation parquet_rel;
Snapshot parquetMetaDataSnapshot;
File parquet_file; /*file handler*/
File file_previousmetadata; /*the read file descriptor for previous metadata*/
CompactProtocol *protocol_read; /*the footer protocol for reading previous metadata*/
int parquetFilePathNameMaxLen;
char *parquetFilePathName; /*stores the filePathname, in hdfs path*/
char *relname;
int cur_segno;
AppendOnlyEntry *aoEntry;
ParquetFileSegInfo *fsInfo;
int64 rowCount;
int64 insertCount; /*the records inserted.*/
int previous_rowgroupcnt; /*the origin row group count*/
int64 fileLen_uncompressed; /*uncompressed length of the data file*/
int64 fileLen; /*file length of the data file*/
char *title;
ParquetMetadata parquetMetadata;
ParquetRowGroup current_rowGroup;
CompactProtocol *footerProtocol; /*footer protocol for processing */
MirroredAppendOnlyOpen *mirroredOpen; /*used for opening segment file*/
QueryContextDispatchingSendBack sendback;
} ParquetInsertDescData;
typedef ParquetInsertDescData *ParquetInsertDesc;
/* ----------------
* function prototypes for parquet access method
* ----------------
*/
extern ParquetScanDesc parquet_beginscan(
Relation relation,
Snapshot parquetMetaDataSnapshot,
TupleDesc relationTupleDesc,
bool *proj);
extern void parquet_rescan(
ParquetScanDesc scan);
extern void parquet_endscan(
ParquetScanDesc scan);
extern void parquet_getnext(
ParquetScanDesc scan,
ScanDirection direction,
TupleTableSlot *slot);
extern ParquetInsertDesc parquet_insert_init(
Relation rel,
ResultRelSegFileInfo *segfileinfo);
extern Oid parquet_insert(
ParquetInsertDesc parquetInsertDesc,
TupleTableSlot *slot);
extern Oid parquet_insert_values(
ParquetInsertDesc parquetInsertDesc,
Datum *values,
bool *nulls,
AOTupleId *aoTupleId);
extern void parquet_insert_finish(
ParquetInsertDesc parquetInsertDesc);
extern uint64 memReservedForParquetInsert(
Oid rel_oid);
extern uint64 memReservedForParquetScan(
Oid rel_oid,
List* attr_list);
#endif /* CDBPARQUETAM_H_ */