src/backend/cdb/cdbparquetfooterserializer.c - hawq - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 /*
  * cdbparquetfooterserializer.c
  *
  *  Created on: Mar 17, 2014
  *      Author: malili
  */

 #include "cdb/cdbparquetfooterserializer.h"
 #include "cdb/cdbparquetstoragewrite.h"
 #include "access/parquetmetadata_c++/MetadataInterface.h"
 #include "lib/stringinfo.h"
 #include "postgres.h"

 /** The deserialize part functions*/
 static int
 readParquetFileMetadata(
 		ParquetMetadata *parquetMetadata,
 		CompactProtocol *prot);

 static int
 readSchemaElement(
 		CompactProtocol *prot,
 		uint32_t schemaListSize,
 		FileField_4C** pfields,
 		int *fieldCount,
 		int *colCount,
 		int *schemaTreeNodeCount);

 static int
 readParquetSchema_SingleField(
 		FileField_4C *pfield,
 		CompactProtocol *prot,
 		int *schemaIndex,
 		int r,
 		int d,
 		int depth,
 		int *colCount,
 		int *schemaTreeNodeCount,
 		char *parentPathInSchema);

 static int
 readSchemaElement_Single(
 		CompactProtocol *prot,
 		PrimitiveTypeName *fieldType,
 		int32_t *type_length,
 		RepetitionType *repetition_type,
 		char **fieldName,
 		int32_t *num_children);

 static int
 readRowGroupInfo(
 		CompactProtocol *prot,
 		struct BlockMetadata_4C* rowGroupInfo,
 		struct FileField_4C* pfields,
 		int pfieldCount);

 static int
 readColumnChunk(
 		CompactProtocol *prot,
 		struct ColumnChunkMetadata_4C *colChunk);

 static int
 readColumnMetadata(
 		CompactProtocol *prot,
 		struct ColumnChunkMetadata_4C *colChunk);

 static void
 assignRDFromFieldToColumnChunk(
 		struct ColumnChunkMetadata_4C* columns,
 		int *columnIndex,
 		struct FileField_4C* pfields,
 		int fieldLevelCount);

 static uint32_t
 readKeyValue(
 		CompactProtocol *prot,
 		char **key,
 		char **value);

 static void
 freeField(struct FileField_4C *field);

 /** The serialize part functions*/
 static int
 writeColumnMetadata(
 		struct ColumnChunkMetadata_4C *columnInfo,
 		CompactProtocol *prot);

 static int
 writeColumnChunk(
 		struct ColumnChunkMetadata_4C *columnInfo,
 		CompactProtocol *prot);

 static int
 writeSchemaElement_Single(
 		CompactProtocol *prot,
 		PrimitiveTypeName *fieldType,
 		int32_t type_length,
 		RepetitionType *repetition_type,
 		char *fieldName,
 		int32_t num_children);

 static int
 writeSchemaElement_SingleField(
 		CompactProtocol *prot,
 		FileField_4C * field,
 		int *colIndex);

 static int
 writeSchemaElement(
 		FileField_4C *pfields,
 		int fieldCount,
 		int nodeCount,
 		CompactProtocol *prot);

 static int
 writePerviousRowGroupMetadata(
 		int rowgroupcnt,
 		ParquetMetadata parquetMetadata,
 		CompactProtocol *read_prot,
 		CompactProtocol *write_prot);

 static int
 writePreviousParquetFileMetadata(
 		ParquetMetadata parquetMetadata,
 		char *fileName,
 		File file,
 		int rowgroupCnt,
 		CompactProtocol **read_prot);

 static int
 writeEndofParquetMetadata(
 		ParquetMetadata parquetMetadata,
 		CompactProtocol *prot);

 /*
  *  Initialize deserialize footer from file, read one row group information once
  */
 void initDeserializeFooter(
 		File file,
 		int64 footerLength,
 		char *fileName,
 		ParquetMetadata *parquetMetadata,
 		CompactProtocol **footerProtocol)
 {
 	/*Initialize the footer protocol*/
 	*footerProtocol =
 			(struct CompactProtocol *)palloc0(sizeof(struct CompactProtocol));

 	initCompactProtocol(*footerProtocol, file, fileName, footerLength, PARQUET_FOOTER_BUFFERMODE_READ);

 	readParquetFileMetadata(parquetMetadata, *footerProtocol);
 }

 /*
  *  The initialize read method, read file metadata, but just read the first 4 parts,
  *  including version, schema information, number of rows, and rowgroup number, but
  *  doesn't read each rowgroup metadata, and keyvalue part. Read metadata of next
  *  rowgroup before reading the actual data.
  *
  *  @parquetMetadata			parquet metadata information
  *  @prot						footer protocol for reading
  */
 int
 readParquetFileMetadata(
 		ParquetMetadata *parquetMetadata,
 		CompactProtocol *prot)
 {
 	uint32_t xfer = 0;
 	TType ftype;
 	int16_t fid;

 	bool isset_version = false;
 	bool isset_schema = false;
 	bool isset_num_rows = false;
 	bool isset_row_groups = false;


 	while (true) {
 		xfer += readFieldBegin(prot, &ftype, &fid);
 		if (ftype == T_STOP) {
 			break;
 		}
 		switch (fid) {
 		case 1:
 			/* Process version*/
 			if (ftype == T_I32) {
 				xfer += readI32(prot, &((*parquetMetadata)->version));
 				isset_version = true;
 			}
 			break;
 		case 2:
 			/* process schema - field information*/
 			if (ftype == T_LIST) {
 				{
 					uint32_t lsize;
 					TType ltype;
 					xfer += readListBegin(prot, &ltype, &lsize);

 					readSchemaElement(prot, lsize,
 							&((*parquetMetadata)->pfield),
 							&((*parquetMetadata)->fieldCount),
 							&((*parquetMetadata)->colCount),
 							&((*parquetMetadata)->schemaTreeNodeCount));
 				}
 				isset_schema = true;
 			}
 			break;
 		case 3:
 			/* process number of rows*/
 			if (ftype == T_I64) {
 				int64_t num_rows = 0;
 				xfer += readI64(prot, &num_rows);
 				(*parquetMetadata)->num_rows = num_rows;
 				isset_num_rows = true;
 			}
 			break;
 		case 4:
 			/* process row group information*/
 			if (ftype == T_LIST) {
 				/* get row group count*/
 				uint32_t lSize;
 				TType etype;
 				xfer += readListBegin(prot, &etype, &lSize);
 				(*parquetMetadata)->blockCount = lSize;
 				isset_row_groups = true;
 				break;
 			}
 			break;
 		case 5:
 			/* Skip this optional field now */
 			if (ftype == T_LIST) {
 				xfer += skipType(prot, ftype);
 			}
 			break;
 		case 6:
 			/* Skip this optional field now */
 			if (ftype == T_STRING) {
 				xfer += skipType(prot, ftype);
 			}
 			break;
 		default:
 			ereport(ERROR,
 				(errcode(ERRCODE_GP_INTERNAL_ERROR), errmsg("file metadata field not recognized with fid: %d", fid)));
 			break;
 		}

 		/*hit row groups, break out the while statement*/
 		if(isset_row_groups)
 			break;
 	}

 	if (!isset_version)
 		ereport(ERROR,
 				(errcode(ERRCODE_GP_INTERNAL_ERROR), errmsg("file metadata version not set")));
 	if (!isset_schema)
 		ereport(ERROR,
 				(errcode(ERRCODE_GP_INTERNAL_ERROR), errmsg("file metadata schema not set")));
 	if (!isset_num_rows)
 		ereport(ERROR,
 				(errcode(ERRCODE_GP_INTERNAL_ERROR), errmsg("file metadata num_rows not set")));
 	if (!isset_row_groups)
 		ereport(ERROR,
 				(errcode(ERRCODE_GP_INTERNAL_ERROR),
 						errmsg("file metadata row group information not set")));
 	return xfer;
 }

 /**
  * Read schema element from field information of parquet file, and convert it
  * to hawq-specific formats (FileField_4C). In hawq side, the field information
  * can have children member linking children together. In Parquet file side,
  * the children information is stored as a deep-traverse tree. For example,
  * for type A{A.a, A.b}, Parquet file stores it as three fields follows:
  * A, A.a, A.b, while HAWQ side stores it as just an element A, and field A has
  * two children: A.a and A.b.
  * Moreover, for the whole metadata, Parquet file side has the first element
  * which doesn't correspond to any type of HAWQ (This is a parquet specifc
  * format), so we just readout the first element out and ignore it when converting.
  *
  * @prot				The protocol reader
  * @schemaListSize		The size of list schema element in parquet file
  * @pfields				Used to store the field information of parquet tables- need convert from schemaElement
  * @fieldCount			The first level of field count, that is the number of columns in parquet table
  * @colCount			The number of columns in each row group, which is the expanded columns of pfield
  * @schemaTreeNodeCount	The count of all the nodes in schema tree, including middle nodes and leaf nodes
  */
 int
 readSchemaElement(
 		CompactProtocol *prot,
 		uint32_t schemaListSize,
 		FileField_4C** pfields,
 		int *fieldCount,
 		int *colCount,
 		int *schemaTreeNodeCount)
 {
 	int fieldIndex;		/*Record the index in HAWQ side: output*/
 	int schemaIndex;	/*Record the index in Parquet file side: input*/
 	int maxFieldCount = schemaListSize - 1;	/*The max size of output array*/
 	*pfields =
 			(struct FileField_4C*) palloc0(maxFieldCount * sizeof(struct FileField_4C));

 	/*for root, r and d both equals 0*/
 	int r = 0;
 	int d = 0;
 	int depth = 1;

 	/*read out the first schema element 'schema', and just ignores it*/
 	PrimitiveTypeName fieldType;
 	int32_t type_length;
 	RepetitionType repetition_type;
 	char *fieldName = NULL;
 	int32_t num_children;
 	readSchemaElement_Single(prot, &fieldType, &type_length, &repetition_type,
 			&fieldName, &num_children);

 	/* Traverse the input which is a string format of the schema-tree list
 	 * (deep-first traverse), and convert it to the result array "pfields".*/
 	schemaIndex = 1;
 	for (fieldIndex = 0; fieldIndex < maxFieldCount; ++fieldIndex) {
 		/* schemaIndex will increase for embedded data type in below function*/
 		readParquetSchema_SingleField(&((*pfields)[fieldIndex]), prot,
 				&schemaIndex, r, d, depth, colCount, schemaTreeNodeCount,
 				NULL);
 		/* When input has been all processed, break out the loop*/
 		if ((++schemaIndex) == schemaListSize)
 			break;
 	}
 	*fieldCount = fieldIndex + 1;
 	return 0;
 }

 /**
  * Read parquet schema element, and then generate file field information of hawq
  * @pfields:		hawq field
  * @prot:			the protocol for reading
  * @schemaIndex:	current parquet schema processing
  * @r:				The repetition level of parent
  * @d:				The definition level of parent
  * @depth			The depth of field in the schema hierarchy tree
  * @schemaTreeNodeCount	The total number of nodes in the schema hierarchy tree, not including root
  * @parquetPathInSchema	The path in schema of parent node
  */
 int
 readParquetSchema_SingleField(
 		FileField_4C *pfield,
 		CompactProtocol *prot,
 		int *schemaIndex,
 		int r,
 		int d,
 		int depth,
 		int *colCount,
 		int *schemaTreeNodeCount,
 		char *parentPathInSchema)
 {
 	uint32_t xfer = 0;
 	/**
 	 * Step1: read schema element out from underlying protocol
 	 */
 	PrimitiveTypeName fieldType = 0;
 	int32_t type_length = 0;
 	RepetitionType repetition_type = 0;
 	char *fieldName = NULL;
 	int32_t num_children = 0;

 	xfer += readSchemaElement_Single(prot, &fieldType, &type_length, &repetition_type,
 			&fieldName, &num_children);

 	/**
 	 * Step 2: Convert the schema element to field of parquet
 	 */
 	/* assign name*/
 	int nameLen = strlen(fieldName);
 	int pathInSchemaLen;
 	pfield->name = fieldName;

 	/* initialize path in schema*/
 	if (parentPathInSchema == NULL) {
 		pathInSchemaLen = nameLen;
 		pfield->pathInSchema = (char*) palloc0(pathInSchemaLen + 1);
 		strcpy(pfield->pathInSchema, pfield->name);
 	} else {
 		pathInSchemaLen = strlen(parentPathInSchema) + nameLen + 1;
 		pfield->pathInSchema = (char*) palloc0(pathInSchemaLen + 1);
 		strcpy(pfield->pathInSchema, parentPathInSchema);
 		strcat(pfield->pathInSchema, ":");
 		strcat(pfield->pathInSchema, pfield->name);
 	}

 	pfield->repetitionType = repetition_type;
 	/*if repetition type equals to 'repeated', r increases*/
 	pfield->r = (pfield->repetitionType == REPEATED) ? r + 1 : r;
 	/*if definition type equals to 'optional' or 'repeated', d increases*/
 	pfield->d = (pfield->repetitionType == REQUIRED) ? d : d + 1;
 	pfield->depth = depth;
 	(*schemaTreeNodeCount)++;

 	pfield->num_children = num_children;
 	if (pfield->num_children > 0)
 	{
 		pfield->children =
 				(struct FileField_4C*) palloc0(pfield->num_children * sizeof(struct FileField_4C));
 		for (int i = 0; i < pfield->num_children; i++)
 		{
 			*schemaIndex = *schemaIndex + 1;
 			/*the first child should sit side by the leaf itself*/
 			xfer += readParquetSchema_SingleField(&(pfield->children[i]), prot,
 					schemaIndex, pfield->r, pfield->d, pfield->depth + 1,
 					colCount, schemaTreeNodeCount, pfield->pathInSchema);
 		}
 	}
 	else
 	{
 		/*only primitive type have type name */
 		pfield->type = fieldType;
 		pfield->typeLength = type_length;
 		(*colCount)++;
 	}
 	return xfer;
 }

 /**
  * Read single field of schema element in parquet file
  * @prot:			The protocol for reading (input)
  * @fieldType:		The type of the field
  * @type_length:	The length of the field type
  * @repetition_type:The repetition level of parent
  * @fieldName		The name of the field
  * @num_children	The children number of field
  */
 int
 readSchemaElement_Single(
 		CompactProtocol *prot,
 		PrimitiveTypeName *fieldType,
 		int32_t *type_length,
 		RepetitionType *repetition_type,
 		char **fieldName,
 		int32_t *num_children)
 {
 	uint32_t xfer = 0;
 	TType ftype;
 	int16_t fid;
 	bool isset_name = false;

 	readStructBegin(prot);

 	while (true) {
 		xfer += readFieldBegin(prot, &ftype, &fid);
 		if (ftype == T_STOP) {
 			break;
 		}
 		switch (fid) {
 		case 1:
 			if (ftype == T_I32) {
 				int32_t val;
 				xfer += readI32(prot, &val);
 				*fieldType = (PrimitiveTypeName) val;
 			}
 			break;
 		case 2:
 			if (ftype == T_I32) {
 				int32_t bit_length = 0;
 				xfer += readI32(prot, &bit_length);
 				*type_length = bit_length / 8;
 			}
 			break;
 		case 3:
 			if (ftype == T_I32) {
 				int32_t ecast1;
 				xfer += readI32(prot, &ecast1);
 				*repetition_type = (RepetitionType) ecast1;
 			}
 			break;
 		case 4:
 			if (ftype == T_STRING) {
 				isset_name = true;
 				xfer += readString(prot, fieldName);
 			}
 			break;
 		case 5:
 			if (ftype == T_I32) {
 				xfer += readI32(prot, num_children);
 			}
 			break;
 		case 6:
 			if (ftype == T_I32) {
 				xfer += skipType(prot, ftype);
 			}
 			break;
 		case 7:
 			/* Skip this optional field now */
 			if (ftype == T_I32) {
 				xfer += skipType(prot, ftype);
 			}
 			break;
 		case 8:
 			/* Skip this optional field now */
 			if (ftype == T_I32) {
 				xfer += skipType(prot, ftype);
 			}
 			break;
 		case 9:
 			/* Skip this optional field now */
 			if (ftype == T_I32) {
 				xfer += skipType(prot, ftype);
 			}
 			break;
 		default:
 			ereport(ERROR,
 				(errcode(ERRCODE_GP_INTERNAL_ERROR), errmsg("file metadata: schema element field not recognizd with fid: %d", fid)));
 			break;
 		}
 	}
 	readStructEnd(prot);

 	if (!isset_name)
 			ereport(ERROR,
 					(errcode(ERRCODE_GP_INTERNAL_ERROR), errmsg(
 							"file metadata schema element information not correct")));
 	return xfer;
 }

 /**
  * Read parquet file rowgroup information, and convert it to hawq structure
  *
  * @prot			the reading protocol
  * @rowGroupInfo	the row group information
  * @pfields			the schema information of the file
  * @pfieldCount		the field count of schema
  */
 int
 readRowGroupInfo(
 		CompactProtocol *prot,
 		struct BlockMetadata_4C* rowGroupInfo,
 		struct FileField_4C* pfields,
 		int pfieldCount)
 {
 	uint32_t xfer = 0;
 	TType ftype;
 	int16_t fid;

 	readStructBegin(prot);

 	bool isset_columns = false;
 	bool isset_total_byte_size = false;
 	bool isset_num_rows = false;

 	while (true) {
 		xfer += readFieldBegin(prot, &ftype, &fid);
 		if (ftype == T_STOP) {
 			break;
 		}
 		switch (fid) {
 		case 1:
 			if (ftype == T_LIST)
 			{
 				uint32_t colChunkCnt;
 				TType etype;
 				xfer += readListBegin(prot, &etype, &colChunkCnt);
 				rowGroupInfo->ColChunkCount = colChunkCnt;
 				rowGroupInfo->columns =
 						(struct ColumnChunkMetadata_4C*) palloc0(
 								colChunkCnt * sizeof(struct ColumnChunkMetadata_4C));
 				for (int i = 0; i < colChunkCnt; i++) {
 					xfer += readColumnChunk(prot, &(rowGroupInfo->columns[i]));
 				}
 				isset_columns = true;
 			}
 			break;
 		case 2:
 			if (ftype == T_I64) {
 				int64_t val;
 				xfer += readI64(prot, &val);
 				rowGroupInfo->totalByteSize = val;
 				isset_total_byte_size = true;
 			}
 			break;
 		case 3:
 			if (ftype == T_I64) {
 				int64_t val;
 				xfer += readI64(prot, &val);
 				rowGroupInfo->rowCount = val;
 				isset_num_rows = true;
 			}
 			break;
 		case 4:
 			/* Skip this optional field now */
 			if (ftype == T_LIST) {
 				xfer += skipType(prot, ftype);
 			}
 			break;
 		default:
 			ereport(ERROR,
 				(errcode(ERRCODE_GP_INTERNAL_ERROR), errmsg("file metadata: row group field not recognized with fid: %d", fid)));
 			break;
 		}
 	}

 	readStructEnd(prot);

 	if (!isset_columns)
 		ereport(ERROR,
 				(errcode(ERRCODE_GP_INTERNAL_ERROR), errmsg("file metadata row group column chunk not set")));
 	if (!isset_total_byte_size)
 		ereport(ERROR,
 				(errcode(ERRCODE_GP_INTERNAL_ERROR), errmsg("file metadata row group total byte size not set")));
 	if (!isset_num_rows)
 		ereport(ERROR,
 				(errcode(ERRCODE_GP_INTERNAL_ERROR), errmsg("file metadata row group row count not set")));

 	/*assign r and d of fields to column chunks*/
 	int columnIndex = 0;
 	assignRDFromFieldToColumnChunk(rowGroupInfo->columns, &columnIndex, pfields,
 			pfieldCount);

 	return xfer;
 }

 /**
  * read column chunk information
  */
 int
 readColumnChunk(
 		CompactProtocol *prot,
 		struct ColumnChunkMetadata_4C *colChunk)
 {
 	uint32_t xfer = 0;
 	TType ftype;
 	int16_t fid;

 	readStructBegin(prot);
 	bool isset_file_offset = false;

 	while (true) {
 		xfer += readFieldBegin(prot, &ftype, &fid);
 		if (ftype == T_STOP) {
 			break;
 		}
 		switch (fid) {
 		case 1:
 			if (ftype == T_STRING) {
 				char *file_path;
 				xfer += readString(prot, &file_path);
 				colChunk->path = file_path;
 			}
 			break;
 		case 2:
 			if (ftype == T_I64) {
 				xfer += readI64(prot, &(colChunk->file_offset));
 				isset_file_offset = true;
 			}
 			break;
 		case 3:
 			if (ftype == T_STRUCT) {
 				/*read column metadata*/
 				xfer += readColumnMetadata(prot, colChunk);
 			}
 			break;
 		default:
 			ereport(ERROR,
 				(errcode(ERRCODE_GP_INTERNAL_ERROR), errmsg("file metadata: column chunk field not recognized with fid: %d", fid)));
 			break;
 		}
 	}

 	readStructEnd(prot);

 	if (!isset_file_offset)
 		ereport(ERROR,
 				(errcode(ERRCODE_GP_INTERNAL_ERROR), errmsg("file metadata: row group column chunk fileoffset not set")));
 	return xfer;

 }

 /**
  * read column chunk metadata information
  */
 int
 readColumnMetadata(
 		CompactProtocol *prot,
 		struct ColumnChunkMetadata_4C *colChunk)
 {
 	uint32_t xfer = 0;
 	TType ftype;
 	int16_t fid;
 	readStructBegin(prot);

 	bool isset_type = false;
 	bool isset_encodings = false;
 	bool isset_path_in_schema = false;
 	bool isset_codec = false;
 	bool isset_num_values = false;
 	bool isset_total_uncompressed_size = false;
 	bool isset_total_compressed_size = false;
 	bool isset_data_page_offset = false;

 	while (true) {
 		xfer += readFieldBegin(prot, &ftype, &fid);
 		if (ftype == T_STOP) {
 			break;
 		}
 		switch (fid) {
 		case 1:
 			if (ftype == T_I32) {
 				int32_t type;
 				xfer += readI32(prot, &type);
 				colChunk->type = (PrimitiveTypeName) type;
 				isset_type = true;
 			}
 			break;
 		case 2:
 			if (ftype == T_LIST) {
 				uint32_t encodingCount;
 				TType etype;
 				xfer += readListBegin(prot, &etype, &encodingCount);
 				colChunk->EncodingCount = encodingCount;
 				colChunk->pEncodings =
 						(enum Encoding *) palloc0(sizeof(enum Encoding) * encodingCount);
 				for (int i = 0; i < encodingCount; i++) {
 					int32_t encoding;
 					xfer += readI32(prot, &encoding);
 					colChunk->pEncodings[i] = (enum Encoding) encoding;
 				}
 				isset_encodings = true;
 			}
 			break;
 		case 3:
 			if (ftype == T_LIST) {
 				{
 					/*process path in schema, setting colchunk->depth and colchunk->pathInSchema*/
 					TType etype;
 					uint32_t lsize;
 					StringInfoData colNameBuf;

 					xfer += readListBegin(prot, &etype, &lsize);
 					colChunk->depth = lsize;
 					initStringInfo(&colNameBuf);
 					char *path_in_schema;
 					for (int i = 0; i < lsize - 1; i++) {
 						xfer += readString(prot, &path_in_schema);
 						appendStringInfo(&colNameBuf, "%s:", path_in_schema);
 						pfree(path_in_schema);
 					}
 					xfer += readString(prot, &path_in_schema);
 					appendStringInfo(&colNameBuf, "%s", path_in_schema);

 					colChunk->pathInSchema = colNameBuf.data;
 					colChunk->colName = path_in_schema;
 				}
 				isset_path_in_schema = true;
 			}
 			break;
 		case 4:
 			if (ftype == T_I32) {
 				int32_t compresscode;
 				xfer += readI32(prot, &compresscode);
 				colChunk->codec = (enum CompressionCodecName) compresscode;
 				isset_codec = true;
 			}
 			break;
 		case 5:
 			if (ftype == T_I64) {
 				int64_t valCnt;
 				xfer += readI64(prot, &valCnt);
 				colChunk->valueCount = valCnt;
 				isset_num_values = true;
 			}
 			break;
 		case 6:
 			if (ftype == T_I64) {
 				xfer += readI64(prot, &(colChunk->totalUncompressedSize));
 				isset_total_uncompressed_size = true;
 			}
 			break;
 		case 7:
 			if (ftype == T_I64) {
 				xfer += readI64(prot, &(colChunk->totalSize));
 				isset_total_compressed_size = true;
 			}
 			break;
 		case 8:
 			if (ftype == T_LIST) {
 				xfer += skipType(prot, ftype);
 			}
 			break;
 		case 9:
 			if (ftype == T_I64) {
 				xfer += readI64(prot, &(colChunk->firstDataPage));
 				isset_data_page_offset = true;
 			}
 			break;
 		case 10:
 			if (ftype == T_I64) {
 				xfer += skipType(prot, ftype);
 			}
 			break;
 		case 11:
 			if (ftype == T_I64) {
 				xfer += skipType(prot, ftype);
 			}
 			break;
 		case 12:
 			/* Skip this optional field now */
 			if (ftype == T_STRUCT) {
 				xfer += skipType(prot, ftype);
 			}
 			break;
 		case 13:
 			/* Skip this optional field now */
 			if (ftype == T_LIST) {
 				xfer += skipType(prot, ftype);
 			}
 			break;
 		default:
 			ereport(ERROR,
 				(errcode(ERRCODE_GP_INTERNAL_ERROR), errmsg("file metadata: column metadata field not recognized with fid: %d", fid)));
 			break;
 		}
 	}
 	readStructEnd(prot);

 	if (!isset_type)
 		ereport(ERROR,
 				(errcode(ERRCODE_GP_INTERNAL_ERROR), errmsg("file metadata: row group column chunk type not set")));
 	if (!isset_encodings)
 		ereport(ERROR,
 				(errcode(ERRCODE_GP_INTERNAL_ERROR), errmsg("file metadata: row group column chunk encoding not set")));
 	if (!isset_path_in_schema)
 		ereport(ERROR,
 				(errcode(ERRCODE_GP_INTERNAL_ERROR), errmsg("file metadata: row group column chunk path_in_schema not set")));
 	if (!isset_codec)
 		ereport(ERROR,
 				(errcode(ERRCODE_GP_INTERNAL_ERROR), errmsg("file metadata: row group column chunk compression code not set")));
 	if (!isset_num_values)
 		ereport(ERROR,
 				(errcode(ERRCODE_GP_INTERNAL_ERROR), errmsg("file metadata: row group column chunk value number not set")));
 	if (!isset_total_uncompressed_size)
 		ereport(ERROR,
 				(errcode(ERRCODE_GP_INTERNAL_ERROR), errmsg("file metadata: row group column chunk total uncompressed size not set")));
 	if (!isset_total_compressed_size)
 		ereport(ERROR,
 				(errcode(ERRCODE_GP_INTERNAL_ERROR), errmsg("file metadata: row group column chunk total compressed size not set")));
 	if (!isset_data_page_offset)
 		ereport(ERROR,
 				(errcode(ERRCODE_GP_INTERNAL_ERROR), errmsg("file metadata: row group column chunk first data page not set")));
 	return xfer;
 }

 /**
  * Assign the r and d value of column chunks, from pfields to column chunks
  */
 void
 assignRDFromFieldToColumnChunk(
 		struct ColumnChunkMetadata_4C* columns,
 		int *columnIndex,
 		struct FileField_4C* pfields,
 		int fieldLevelCount)
 {
 	for (int i = 0; i < fieldLevelCount; i++)
 	{
 		if (pfields[i].num_children == 0)
 		{
 			columns[*columnIndex].r = pfields[i].r;
 			columns[*columnIndex].d = pfields[i].d;
 			(*columnIndex)++;
 		}
 		else
 		{
 			assignRDFromFieldToColumnChunk(columns, columnIndex,
 					pfields[i].children, pfields[i].num_children);
 		}
 	}
 }

 /**
  * Read key value part out
  */
 uint32_t readKeyValue(
 		CompactProtocol *prot,
 		char **key,
 		char **value)
 {
 	uint32_t xfer = 0;
 	TType ftype;
 	int16_t fid;
 	bool isset_key = false;

 	readStructBegin(prot);

 	while (true) {
 		readFieldBegin(prot, &ftype, &fid);
 		if (ftype == T_STOP)
 			break;
 		switch (fid) {
 		case 1:
 			if (ftype == T_STRING) {
 				xfer += readString(prot, key);
 				isset_key = true;
 			}
 			break;
 		case 2:
 			if (ftype == T_STRING) {
 				xfer += readString(prot, value);
 			}
 			break;
 		default:
 			ereport(ERROR,
 				(errcode(ERRCODE_GP_INTERNAL_ERROR), errmsg("file metadata: key value field not recognized with fid: %d", fid)));
 			break;
 		}
 	}
 	readStructEnd(prot);

 	if (!isset_key)
 		ereport(ERROR, (errcode(ERRCODE_GP_INTERNAL_ERROR), errmsg("file metadata key value: key not set")));
 	return xfer;
 }

 /*
  * Get next row group metadata information
  *
  * Return value: whether there exists new row group metadata information
  *
  */
 void
 readNextRowGroupInfo(
 		ParquetMetadata parquetMetadata,
 		CompactProtocol *prot)
 {
 	/*free current rowgroup info and read next rowgroup metadata*/
 	if(parquetMetadata->currentBlockMD != NULL){
 		freeRowGroupInfo(parquetMetadata->currentBlockMD);
 		parquetMetadata->currentBlockMD = NULL;
 	}

 	/*read next row group metadata out*/
 	parquetMetadata->currentBlockMD =
 			(struct BlockMetadata_4C *) palloc0(sizeof(struct BlockMetadata_4C));
 	readRowGroupInfo(prot, parquetMetadata->currentBlockMD,
 			parquetMetadata->pfield, parquetMetadata->fieldCount);
 }


 /**
  * End of serialize footer, read the key/value part out if exists, then read the stop variable
  */
 void
 endDeserializerFooter(
 		ParquetMetadata parquetMetadata,
 		CompactProtocol **prot)
 {
 	TType ftype;
 	int16_t fid;
 	int xfer = 0;


 	while (true) {
 		xfer += readFieldBegin(*prot, &ftype, &fid);
 		if (ftype == T_STOP) {
 			break;
 		}
 		switch (fid) {
 		case 5:
 			if (ftype == T_LIST) {
 				uint32_t lsize;
 				TType etype;
 				xfer += readListBegin(*prot, &etype, &lsize);

 				for (int i = 0; i < lsize; i++) {
 					char *key = NULL;
 					char *value = NULL;
 					xfer += readKeyValue(*prot, &key, &value);

 					if ((key != NULL) && (strcmp(key, "hawq.schema") == 0)) {
 						int schemaLen = strlen(value);
 						parquetMetadata->hawqschemastr =
 								(char*) palloc0(schemaLen + 1);
 						strcpy(parquetMetadata->hawqschemastr, value);
 					}
 				}
 			}
 			break;
 		case 6:
 			/* Skip this optional field now */
 			if (ftype == T_STRING) {
 				xfer += skipType(*prot, ftype);
 			}
 			break;
 		default:
 			ereport(ERROR,
 					(errcode(ERRCODE_GP_INTERNAL_ERROR), errmsg("incorrect file metadata format with fid: %d", fid)));
 			break;
 		}
 	}

 	freeFooterProtocol(*prot);
 }

 void freeFooterProtocol(CompactProtocol *protocol) {
 	if (protocol) {
 		freeCompactProtocol(protocol);
 		pfree(protocol);
 	}
 }

 /**
  * Write part functions
  */
 int
 writeColumnMetadata(
 		struct ColumnChunkMetadata_4C *columnInfo,
 		CompactProtocol *prot)
 {
 	uint32_t xfer = 0;
 	char *elemPath = NULL;
 	const char *delim = ":";
 	Assert(NULL != columnInfo->pathInSchema);
 	char path[strlen(columnInfo->pathInSchema) + 1];

 	xfer += writeStructBegin(prot);

 	/*write out type*/
 	xfer += writeFieldBegin(prot, T_I32, 1);
 	xfer += writeI32(prot, columnInfo->type);

 	/*write out encoding*/
 	xfer += writeFieldBegin(prot, T_LIST, 2);
 	xfer += writeListBegin(prot, T_I32, columnInfo->EncodingCount);
 	for (int i = 0; i < columnInfo->EncodingCount; i++) {
 		xfer += writeI32(prot, (int32_t)(columnInfo->pEncodings[i]));
 	}

 	/*write out path_in_schema*/
 	xfer += writeFieldBegin(prot, T_LIST, 3);
 	xfer += writeListBegin(prot, T_STRING, columnInfo->depth);
 	strcpy(path, columnInfo->pathInSchema);

 	elemPath = strtok(path, delim);
 	if (elemPath == NULL) {
 		ereport(ERROR,
 				(errcode(ERRCODE_GP_INTERNAL_ERROR),
 						errmsg("file metadata column metadata(path_in_schema) not correct")));
 	}
 	xfer += writeString(prot, elemPath, strlen(elemPath));
 	for (int i = 1; i < columnInfo->depth; i++) {
 		elemPath = strtok(NULL, delim);
 		if (elemPath == NULL) {
 			ereport(ERROR,
 					(errcode(ERRCODE_GP_INTERNAL_ERROR),
 							errmsg("file metadata column metadata(path_in_schema) not correct")));
 		}
 		xfer += writeString(prot, elemPath, strlen(elemPath));
 	}

 	/*write out codec*/
 	xfer += writeFieldBegin(prot, T_I32, 4);
 	xfer += writeI32(prot, (int32_t)columnInfo->codec);

 	/*write out num of values*/
 	xfer += writeFieldBegin(prot, T_I64, 5);
 	xfer += writeI64(prot, (int64_t)columnInfo->valueCount);

 	/*write total uncompressed size*/
 	xfer += writeFieldBegin(prot, T_I64, 6);
 	xfer += writeI64(prot, columnInfo->totalUncompressedSize);

 	/*write out total compressed size*/
 	xfer += writeFieldBegin(prot, T_I64, 7);
 	xfer += writeI64(prot, columnInfo->totalSize);

 	/*write out key value metadata.*/
 	/*There's no key value metadata for parquet storage, don't need to write it out*/

 	/*write out data page offset*/
 	xfer += writeFieldBegin(prot, T_I64, 9);
 	xfer += writeI64(prot, columnInfo->firstDataPage);

 	/*write out index page offset and dictionary page offset. No need to write currently*/

 	/*write out field stop identifier*/
 	xfer += writeFieldStop(prot);
 	xfer += writeStructEnd(prot);

 	return xfer;
 }

 int
 writeColumnChunk(
 		struct ColumnChunkMetadata_4C *columnInfo,
 		CompactProtocol *prot)
 {
 	uint32_t xfer = 0;
 	xfer += writeStructBegin(prot);

 	/*write out column path*/
 	if(columnInfo->path != NULL)
 	{
 		xfer += writeFieldBegin(prot, T_STRING, 1);
 		xfer += writeString(prot, columnInfo->path, strlen(columnInfo->path));
 	}

 	/*write out file offset*/
 	xfer += writeFieldBegin(prot, T_I64, 2);
 	xfer += writeI64(prot, columnInfo->file_offset);

 	/*write out column metadata*/
 	xfer += writeFieldBegin(prot, T_STRUCT, 3);
 	xfer += writeColumnMetadata(columnInfo, prot);

 	xfer += writeFieldStop(prot);
 	xfer += writeStructEnd(prot);

 	return xfer;
 }


 int
 writeRowGroupInfo(
 		struct BlockMetadata_4C* rowGroupInfo,
 		CompactProtocol *prot)
 {
 	uint32_t xfer = 0;
 	xfer += writeStructBegin(prot);

 	/*write out the column chunk metadata*/
 	xfer += writeFieldBegin(prot, T_LIST, 1);
 	xfer += writeListBegin(prot, T_STRUCT, rowGroupInfo->ColChunkCount);
 	for(int i = 0; i < rowGroupInfo->ColChunkCount; i++){
 		/*write out each column chunk metadata*/
 		xfer += writeColumnChunk(&(rowGroupInfo->columns[i]), prot);
 	}

 	/*write out total byte size*/
 	xfer += writeFieldBegin(prot, T_I64, 2);
 	xfer += writeI64(prot, rowGroupInfo->totalByteSize);

 	/*write out num_rows*/
 	xfer += writeFieldBegin(prot, T_I64, 3);
 	xfer += writeI64(prot, rowGroupInfo->rowCount);

 	xfer += writeFieldStop(prot);
 	xfer += writeStructEnd(prot);
 	return xfer;
 }

 int
 writeSchemaElement_Single(
 		CompactProtocol *prot,
 		PrimitiveTypeName *fieldType,
 		int32_t type_length,
 		RepetitionType *repetition_type,
 		char *fieldName,
 		int32_t num_children)
 {
 	uint32_t xfer = 0;
 	xfer += writeStructBegin(prot);

 	/*write out type*/
 	if(fieldType){
 		xfer += writeFieldBegin(prot, T_I32, 1);
 		xfer += writeI32(prot, *(int32_t*)fieldType);
 	}

 	/*write out type length*/
 	if(type_length != 0){
 		xfer += writeFieldBegin(prot, T_I32, 2);
 		xfer += writeI32(prot, type_length * 8);
 	}

 	/*write out repetition type. Is there repetition type for root??? Need verify*/
 	if(repetition_type)
 	{
 		xfer += writeFieldBegin(prot, T_I32, 3);
 		xfer += writeI32(prot, *repetition_type);
 	}

 	/*write out name*/
 	xfer += writeFieldBegin(prot, T_STRING, 4);
 	xfer += writeString(prot, fieldName, strlen(fieldName));

 	/*write out number of children*/
 	if(num_children != 0)
 	{
 		xfer += writeFieldBegin(prot, T_I32, 5);
 		xfer += writeI32(prot, num_children);
 	}

 	/*no need to write out converted type, since there is no converted type
 	 *in hawq parquet implemention.*/

 	xfer += writeFieldStop(prot);
 	xfer += writeStructEnd(prot);
 	return xfer;
 }

 int
 writeSchemaElement_SingleField(
 		CompactProtocol *prot,
 		FileField_4C * field,
 		int *colIndex)
 {
 	uint32_t xfer = 0;
 	(*colIndex) = (*colIndex) + 1;
 	/*if have children, write out the field, then expand to its children*/
 	if(field->num_children > 0){
 		xfer += writeSchemaElement_Single(prot, NULL, 0, &(field->repetitionType), field->name, field->num_children);
 		for(int i = 0; i < field->num_children; i++)
 		{
 			xfer += writeSchemaElement_SingleField(prot, &(field->children[i]), colIndex);
 		}
 	}
 	else
 	{
 		/*if no children, directly write out the field*/
 		xfer += writeSchemaElement_Single(prot, &(field->type), field->typeLength, &(field->repetitionType),
 				field->name, field->num_children);
 	}

 	return xfer;
 }

 int
 writeSchemaElement(
 		FileField_4C *pfields,
 		int fieldCount,
 		int nodeCount,
 		CompactProtocol *prot)
 {
 	uint32_t xfer = 0;

 	/*write out schema element 0*/
 	xfer += writeSchemaElement_Single(prot, NULL, 0, NULL, "schema", fieldCount);

 	int colIndex = 0;
 	for (int i = 0; i < fieldCount; ++i) {
 		FileField_4C *field = &pfields[i];
 		xfer += writeSchemaElement_SingleField(prot, field, &colIndex);
 	}

 	/*colIndex should equals to colCount after processing all the columns*/
 	if (nodeCount != colIndex) {
 		ereport(ERROR,
 						(errcode(ERRCODE_GP_INTERNAL_ERROR),
 								errmsg("file metadata schema element not correct")));
 	}

 	return xfer;
 }

 /**
  * Deserialize previous row group metadata: copy it from previous location to the new footer
  */
 int
 writePerviousRowGroupMetadata(
 		int rowgroupcnt,
 		ParquetMetadata parquetMetadata,
 		CompactProtocol *read_prot,
 		CompactProtocol *write_prot){
 	uint32_t xfer = 0;
 	/*for each row group metadata, read it through the read protocol, and then write it out
 	 *to write protocol*/
 	for(int i = 0; i < rowgroupcnt; i++)
 	{
 		readNextRowGroupInfo(parquetMetadata, read_prot);
 		xfer += writeRowGroupInfo(parquetMetadata->currentBlockMD, write_prot);
 	}

 	return xfer;
 }

 /**
  * Write out begin of parquet file metadata (part before rowgroup),including version,
  * schema, and num_rows
  */
 int
 writePreviousParquetFileMetadata(
 		ParquetMetadata parquetMetadata,
 		char *fileName,
 		File file,
 		int rowgroupCnt,
 		CompactProtocol **read_prot)
 {
 	uint32_t xfer = 0;
 	CompactProtocol *write_prot = (struct CompactProtocol *) palloc0(sizeof(struct CompactProtocol));

 	initCompactProtocol(write_prot, file, fileName, -1,
 			PARQUET_FOOTER_BUFFERMODE_WRITE);

 	xfer += writeStructBegin(write_prot);

 	/*write out version*/
 	xfer += writeFieldBegin(write_prot, T_I32, 1);
 	xfer += writeI32(write_prot, (int32_t)parquetMetadata->version);

 	/*write out schema*/
 	xfer += writeFieldBegin(write_prot, T_LIST, 2);
 	xfer += writeListBegin(write_prot, T_STRUCT, parquetMetadata->schemaTreeNodeCount + 1);
 	xfer += writeSchemaElement(parquetMetadata->pfield, parquetMetadata->fieldCount, parquetMetadata->schemaTreeNodeCount, write_prot);

 	/*write out number of rows*/
 	xfer += writeFieldBegin(write_prot, T_I64, 3);
 	xfer += writeI64(write_prot, (int64_t)parquetMetadata->num_rows);

 	/*write out rowgroup size*/
 	xfer += writeFieldBegin(write_prot, T_LIST, 4);
 	xfer += writeListBegin(write_prot, T_STRUCT, parquetMetadata->blockCount);

 	/*write out the previous row group metadata information before deserialize*/
 	writePerviousRowGroupMetadata(rowgroupCnt, parquetMetadata, *read_prot, write_prot);

 	/*append the first part of footer to file*/
 	xfer = appendFooterBufferTempData(file, write_prot->footerProcessor);

 	/*free the write protocol for first part of file*/
 	freeCompactProtocol(write_prot);
 	pfree(write_prot);

 	/*if there is previous metadata, should end footer serializer*/
 	if(rowgroupCnt != 0)
 		endDeserializerFooter(parquetMetadata, read_prot);

 	return xfer;
 }


 int
 writeEndofParquetMetadata(
 		ParquetMetadata parquetMetadata,
 		CompactProtocol *prot)
 {
 	uint32_t xfer = 0;
 	/** write out key value metadata */
 	/*hack here. The last field is rowgroup, the id should be 4*/
 	setLastFieldId(prot, 4);
 	xfer += writeFieldBegin(prot, T_LIST, 5);
 	xfer += writeListBegin(prot, T_STRUCT, 1);
 	xfer += writeStructBegin(prot);
 	/*write out key*/
 	xfer += writeFieldBegin(prot, T_STRING, 1);
 	xfer += writeString(prot, "hawq.schema", strlen("hawq.schema"));
 	/*write out value*/
 	xfer += writeFieldBegin(prot, T_STRING, 2);
 	if (parquetMetadata->hawqschemastr == NULL)
 		parquetMetadata->hawqschemastr = generateHAWQSchemaStr(parquetMetadata->pfield,
 							parquetMetadata->fieldCount);
 	xfer += writeString(prot, parquetMetadata->hawqschemastr, strlen(parquetMetadata->hawqschemastr));
 	/*write out end of key value*/
 	xfer += writeFieldStop(prot);
 	xfer += writeStructEnd(prot);

 	/*write out the file metadata field end identifier*/
 	xfer += writeFieldStop(prot);
 	xfer += writeStructEnd(prot);

 	return xfer;
 }

 /*
  *  Initialize DeSerialize footer to file
  */
 void
 initSerializeFooter(
 		CompactProtocol **footerProtocol,
 		char *fileName){

 	*footerProtocol = (struct CompactProtocol *)
 		palloc0(sizeof(struct CompactProtocol));

 	initCompactProtocol(*footerProtocol, -1, fileName, -1, PARQUET_FOOTER_BUFFERMODE_WRITE);
 }

 /* End deserialize footer to file, copy from temporary file to data file*/
 int
 endSerializeFooter(
 		CompactProtocol **read_prot,
 		CompactProtocol **write_prot,
 		char	*fileName,
 		File	file,
 		ParquetMetadata parquetMetadata,
 		int		rowgroup_cnt)
 {
 	int footerLength = 0;

 	/*firstly write out previous parquet file metadata*/
 	footerLength += writePreviousParquetFileMetadata(parquetMetadata, fileName,
 			file, rowgroup_cnt, read_prot);

 	/*secondly write out the new added rowgroup metadata and remaining keyvalue part*/
 	writeEndofParquetMetadata(parquetMetadata, *write_prot);
 	footerLength += appendFooterBufferTempData(file, (*write_prot)->footerProcessor);

 	/*free the compact write protocol*/
 	freeCompactProtocol(*write_prot);
 	pfree(*write_prot);

 	return footerLength;
 }


 /**
  * pfree the current blockMetadata
  */
 void
 freeRowGroupInfo(struct BlockMetadata_4C *blockMetadata) {
 	/* pfree column chunk information*/
 	for (int j = 0; j < blockMetadata->ColChunkCount; j++) {
 		struct ColumnChunkMetadata_4C *colChunk = &(blockMetadata->columns[j]);
 		if (colChunk->path != NULL)
 			pfree(colChunk->path);
 		if (colChunk->colName != NULL)
 			pfree(colChunk->colName);
 		if (colChunk->pathInSchema != NULL)
 			pfree(colChunk->pathInSchema);
 		if (colChunk->pEncodings != NULL)
 			pfree(colChunk->pEncodings);
 	}
 	pfree(blockMetadata->columns);
 	blockMetadata->columns = NULL;
 	pfree(blockMetadata);
 }

 /**
  * Free parquet metadata
  */
 void freeParquetMetadata(ParquetMetadata parquetMetadata) {
 	/* pfree field information*/
 	for (int i = 0; i < parquetMetadata->fieldCount; i++) {
 		freeField(&(parquetMetadata->pfield[i]));
 	}
 	pfree(parquetMetadata->pfield);

 	/* pfree hawqschemastr*/
 	if (parquetMetadata->hawqschemastr != NULL)
 		pfree(parquetMetadata->hawqschemastr);

 	if(parquetMetadata->currentBlockMD != NULL)
 	{
 		freeRowGroupInfo(parquetMetadata->currentBlockMD);
 	}

 	if (parquetMetadata->estimateChunkSizes != NULL)
 	{
 		pfree(parquetMetadata->estimateChunkSizes);
 	}
 	pfree(parquetMetadata);
 }

 /**
  * Free field information
  */
 void
 freeField(struct FileField_4C *field) {
 	if (field->name != NULL)
 		pfree(field->name);
 	if (field->pathInSchema != NULL)
 		pfree(field->pathInSchema);
 	if (field->num_children > 0){
 		for (int i = 0; i < field->num_children; i++) {
 			freeField(&(field->children[i]));
 		}
 		pfree(field->children);
 	}
 }