src/backend/access/external/pxfanalyze.c - hawq - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */
 /*-------------------------------------------------------------------------
  *
  * pxfanalyze.c
  *	  Helper functions to perform ANALYZE on PXF tables.
  *-------------------------------------------------------------------------
  */

 #include "postgres.h"
 #include <curl/curl.h>
 #include <json-c/json.h>
 #include "access/hd_work_mgr.h"
 #include "access/pxfanalyze.h"
 #include "catalog/namespace.h"
 #include "catalog/pg_exttable.h"
 #include "cdb/cdbanalyze.h"
 #include "commands/analyzeutils.h"
 #include "lib/stringinfo.h"
 #include "nodes/makefuncs.h"
 #include "utils/builtins.h"
 #include "utils/elog.h"
 #include "utils/guc.h"
 #include "utils/lsyscache.h"


 static void buildPxfTableCopy(Oid relationOid,
 		float4	samplingRatio,
 		int pxfStatMaxFragments,
 		const char* schemaName, const char* tableName,
 		const char* sampleSchemaName, const char* pxfSampleTable);
 static void buildSampleFromPxf(const char* sampleSchemaName,
 		const char* sampleTableName,
 		const char* pxfSampleTable,
 		List *lAttributeNames,
 		float4 *sampleTableRelTuples);

 static float4 calculateSamplingRatio(float4 relTuples,
 									 float4 relFrags,
 									 float4 requestedSampleSize);

 static char* parseFormat(char fmtcode);
 static char* escape_unprintables(const char *src);
 static char* escape_fmtopts_string(const char *src);
 static char* custom_fmtopts_string(const char *src);
 static void printExtTable(Oid relationOid, ExtTableEntry* extTable);
 static char* createPxfSampleStmt(Oid relationOid,
 		const char* schemaName, const char* tableName,
 		const char* sampleSchemaName, const char* pxfSampleTable,
 		float4 pxf_sample_ratio, int pxf_max_fragments);
 static float4 getPxfFragmentTupleCount(Oid relationOid);
 static float4 countFirstFragmentTuples(const char* schemaName,
 									   const char* tableName);
 static void getFragmentStats(Relation rel, StringInfo location,
 							 float4 *numfrags, float4 *firstfragsize,
 							 float4 *totalsize);


 void analyzePxfEstimateReltuplesRelpages(Relation relation,
 		StringInfo location,
 		float4* estimatedRelTuples,
 		float4* estimatedRelPages)
 {

 	float4 numFrags = 0.0;
 	float4 firstFragSize = 0.0;
 	float4 totalSize = 0.0;

 	float4 firstFragTuples = 0.0;
 	float4 estimatedTuples = 0.0;

 	/* get number of fragments, size of first fragment and total size.
 	 * This is used together with the number of tuples in first fragment
 	 * to estimate the number of tuples in the table. */
 	getFragmentStats(relation, location, &numFrags, &firstFragSize, &totalSize);

 	/* get number of tuples from first fragment */
 	firstFragTuples = getPxfFragmentTupleCount(relation->rd_id);

 	/* calculate estimated tuple count */
 	if (firstFragTuples > 0)
 	{
 		Assert(firstFragSize > 0);
 		Assert(totalSize > 0);
 		/* The calculation:
 		 * size of each tuple = first fragment size / first fragment row
 		 * total size = size of each tuple * number of tuples
 		 * number of tuples = total size / size of each tuple
 		 */
 		estimatedTuples = (totalSize / firstFragSize) * firstFragTuples;
 	}

 	elog(DEBUG2, "Estimated tuples for PXF table: %f. (first fragment count %f, fragments number %f, old estimate %f)",
 		 estimatedTuples, firstFragTuples, numFrags, *estimatedRelTuples);

 	*estimatedRelTuples = estimatedTuples;
 	*estimatedRelPages = numFrags;

 	/* relpages can't be 0 if there are tuples in the table. */
 	if ((*estimatedRelPages < 1.0) && (estimatedTuples > 0))
 	{
 		*estimatedRelPages = 1.0;
 	}

 	/* in case there were problems with the PXF service, keep the defaults */
 	if (*estimatedRelPages < 0)
 	{
 		*estimatedRelPages =  gp_external_table_default_number_of_pages;
 	}
 	if (*estimatedRelTuples < 0)
 	{
 		*estimatedRelTuples =  gp_external_table_default_number_of_tuples;
 	}
 }

 /*
  * Creates a sample table with data from a PXF table.
  * We need to create a copy of the PXF table, in order to pass the sampling
  * parameters pxf_sample_ratio and pxf_max_fragments as attributes,
  * and to create a segment reject limit of 25 percent.
  *
  * The new PXF table is sampled and the results are saved in the returned sample table.
  * Note that ANALYZE can be executed only by the database owner.
  * It is safe to assume that the database owner has permissions to create temp tables.
  * The sampling is done by uniformly sampling pxf_sample_ratio records of each fragments,
  * up to pxf_max_fragments.
  *
  * Input:
  * 	relationOid 	- relation to be sampled
  * 	sampleTableName - sample table name, moderately unique
  * 	lAttributeNames - attributes to be included in the sample
  * 	relTuples		- estimated size of relation
  * 	relFrags		- estimated number of fragments in relation
  * 	requestedSampleSize - as determined by attribute statistics requirements.
  * 	sampleTableRelTuples	- limit on size of the sample.
  * Output:
  * 	sampleTableRelTuples - number of tuples in the sample table created.
  */
 Oid buildPxfSampleTable(Oid relationOid,
 		char* sampleTableName,
 		List *lAttributeNames,
 		float4	relTuples,
 		float4  relFrags,
 		float4 	requestedSampleSize,
 		float4 *sampleTableRelTuples)
 {
 	const char *schemaName = get_namespace_name(get_rel_namespace(relationOid)); /* must be pfreed */
 	const char *tableName = get_rel_name(relationOid); /* must be pfreed */
 	char	*sampleSchemaName = pstrdup("pg_temp");
 	char	*pxfSampleTable = temporarySampleTableName(relationOid, "pg_analyze_pxf"); /* must be pfreed */
 	Oid			sampleTableOid = InvalidOid;
 	Oid			pxfSampleTableOid = InvalidOid;
 	RangeVar 	*rangeVar = NULL;
 	float4 pxfSamplingRatio = 0.0;

 	Assert(requestedSampleSize > 0.0);
 	Assert(relTuples > 0.0);
 	Assert(relFrags > 0.0);

 	/* calculate pxf_sample_ratio */
 	pxfSamplingRatio = calculateSamplingRatio(relTuples, relFrags, requestedSampleSize);

 	/* build copy of original pxf table */
 	buildPxfTableCopy(relationOid,
 					  pxfSamplingRatio,
 					  pxf_stat_max_fragments,
 					  schemaName, tableName,
 					  sampleSchemaName, pxfSampleTable);

 	rangeVar = makeRangeVar(NULL /*catalogname*/, sampleSchemaName, pxfSampleTable, -1);
 	pxfSampleTableOid = RangeVarGetRelid(rangeVar, true /* failOK */, false /*allowHcatalog*/);

 	buildSampleFromPxf(sampleSchemaName, sampleTableName, pxfSampleTable,
 					   lAttributeNames, sampleTableRelTuples);

 	rangeVar = makeRangeVar(NULL /*catalogname*/, sampleSchemaName, sampleTableName, -1);
 	sampleTableOid = RangeVarGetRelid(rangeVar, true /* failOK */, false /*allowHcatalog*/);

 	Assert(sampleTableOid != InvalidOid);

 	/**
 	 * MPP-10723: Very rarely, we may be unlucky and generate an empty sample table. We error out in this case rather than
 	 * generate bad statistics.
 	 */

 	if (*sampleTableRelTuples < 1.0)
 	{
 		elog(ERROR, "ANALYZE unable to generate accurate statistics on table %s.%s. Try lowering gp_analyze_relative_error",
 				quote_identifier(schemaName),
 				quote_identifier(tableName));
 	}

 	if (pxfSampleTableOid != InvalidOid)
 	{
 		elog(DEBUG2, "ANALYZE dropping PXF sample table");
 		dropSampleTable(pxfSampleTableOid, true);
 	}

 	pfree((void *) rangeVar);
 	pfree((void *) pxfSampleTable);
 	pfree((void *) tableName);
 	pfree((void *) schemaName);
 	pfree((void *) sampleSchemaName);
 	return sampleTableOid;
 }

 /*
  * Creates an external PXF table, with the same properties
  * as the given PXF table to be sampled, other than additional
  * 2 attributes in the location clause -
  * pxf_stats_sample_ratio and pxf_stats_max_fragments,
  * and a segment reject limit of 25 percent.
  */
 static void buildPxfTableCopy(Oid relationOid,
 		float4 samplingRatio,
 		int pxfStatMaxFragments,
 		const char* schemaName, const char* tableName,
 		const char* sampleSchemaName, const char* pxfSampleTable)
 {

 	/* create table string */
 	char* createPxfSampleStr = createPxfSampleStmt(relationOid,
 			schemaName, tableName,
 			sampleSchemaName, pxfSampleTable,
 			samplingRatio, pxfStatMaxFragments);

 	spiExecuteWithCallback(createPxfSampleStr, false /*readonly*/, 0 /*tcount */,
 			NULL, NULL);

 	pfree(createPxfSampleStr);

 	elog(DEBUG2, "Created PXF table %s.%s for sampling PXF table %s.%s",
 			quote_identifier(sampleSchemaName),
 			quote_identifier(pxfSampleTable),
 			quote_identifier(schemaName),
 			quote_identifier(tableName));
 }

 /*
  * Creates and populates a sample table for a PXF table.
  * The actual queried table is not the original PXF table but a copy of it
  * with additional attributes to enable sampling.
  *
  * The results are stored in sampleTableRelTuples.
  */
 static void buildSampleFromPxf(const char* sampleSchemaName,
 		const char* sampleTableName,
 		const char* pxfSampleTable,
 		List *lAttributeNames,
 		float4 *sampleTableRelTuples)
 {
 	int nAttributes = -1;
 	int i = 0;
 	ListCell *le = NULL;
 	StringInfoData str;

 	initStringInfo(&str);

 	appendStringInfo(&str, "create table %s.%s as (select ",
 			quote_identifier(sampleSchemaName), quote_identifier(sampleTableName));

 	nAttributes = list_length(lAttributeNames);

 	foreach_with_count(le, lAttributeNames, i)
 	{
 		appendStringInfo(&str, "Ta.%s", quote_identifier((const char *) lfirst(le)));
 		if (i < nAttributes - 1)
 		{
 			appendStringInfo(&str, ", ");
 		}
 		else
 		{
 			appendStringInfo(&str, " ");
 		}
 	}

 	appendStringInfo(&str, "from %s.%s as Ta) distributed randomly",
 			quote_identifier(sampleSchemaName),
 			quote_identifier(pxfSampleTable));

 	/* in case of PXF error, analyze on this table will reverted */
 	spiExecuteWithCallback(str.data, false /*readonly*/, 0 /*tcount */,
 			spiCallback_getProcessedAsFloat4, sampleTableRelTuples);

 	pfree(str.data);

 	elog(DEBUG2, "Created sample table %s.%s with nrows=%.0f",
 			quote_identifier(sampleSchemaName),
 			quote_identifier(sampleTableName),
 			*sampleTableRelTuples);
 }

 /*
  * Returns a sampling ratio - a fraction between 1.0 and 0.0001
  * representing how many samples should be returned from each fragment
  * of a PXF table.
  * The ratio is calculated based on the tuples estimate of the table
  * and on the number of the actually sampled fragments
  * (GUC pxf_stat_max_fragments), by the following formula:
  * ratio = (<sample size> / <tuples estimate>) * (<total # fragments> / <fragments to be sampled>)
  * If the ratio is too big or small, it is corrected to 1.0 or 0.0001 respectively.
  *
  * Input:
  * 	relTuples		- number of tuples in the table
  * 	relFrags		- number of fragments in the table
  * 	requestedSampleSize - number of sample tuples required
  * Output:
  * 	the sampling ratio for the table.
  */
 static float4 calculateSamplingRatio(float4 relTuples,
 		 float4 relFrags,
 		 float4 requestedSampleSize)
 {
 	float4 sampleRatio = 0.0;

 	Assert(relFrags > 0);
 	Assert(relTuples > 0);
 	Assert(requestedSampleSize > 0);

 	/* sample ratio for regular tables */
 	sampleRatio = requestedSampleSize / relTuples;

 	if (pxf_stat_max_fragments < relFrags)
 	{
 		/*
 		 * Correct ratio according to the number of sampled fragments.
 		 * If there are less fragments to sample, the ratio should be increased.
 		 * If the corrected sampling ratio is > 100%, make it 100%
 		 */
 		sampleRatio = sampleRatio * (relFrags / pxf_stat_max_fragments);
 		if (sampleRatio > 1.0)
 		{
 			sampleRatio = 1.0;
 		}
 	}

 	/*
 	 * If the ratio is too low (< 0.0001), correct it to 0.0001.
 	 * That means that the lowest rate we will get is 1 tuple per 10,000.
 	 */
 	if (sampleRatio < 0.0001)
 	{
 		sampleRatio = 0.0001;
 	}

 	elog(DEBUG2, "PXF ANALYZE: pxf_stats_sample_ratio = %f, pxf_stats_max_fragments = %d, table fragments = %f",
 		 sampleRatio, pxf_stat_max_fragments, relFrags);
 	return sampleRatio;
 }

 static char* parseFormat(char fmtcode)
 {
 	if (fmttype_is_custom(fmtcode))
 		return "CUSTOM";
 	if (fmttype_is_text(fmtcode))
 		return "TEXT";
 	if (fmttype_is_csv(fmtcode))
 		return "CSV";

 	elog(ERROR, "Unrecognized external table format '%c'", fmtcode);
 	return NULL;
 }

 /* Helper functions from dumputils.c, modified to backend (malloc->palloc) */

 /*
  * Escape any unprintables (0x00 - 0x1F) in given string
  */
 char *
 escape_unprintables(const char *src)
 {
 	int			len = strlen(src),
 				i,
 				j;
 	char	   *result = palloc0(len * 4 + 1);
 	if (!result)
 		return NULL; /* out of memory */

 	for (i = 0, j = 0; i < len; i++)
 	{
 		if ((src[i] <= '\x1F') && (src[i] != '\x09' /* TAB */))
 		{
 			snprintf(&(result[j]), 5, "\\x%02X", src[i]);
 			j += 4;
 		}
 		else
 			result[j++] = src[i];
 	}
 	result[j] = '\0';
 	return result;
 }

 /*
  * Escape backslashes and apostrophes in EXTERNAL TABLE format strings.
  *
  * The fmtopts field of a pg_exttable tuple has an odd encoding -- it is
  * partially parsed and contains "string" values that aren't legal SQL.
  * Each string value is delimited by apostrophes and is usually, but not
  * always, a single character.	The fmtopts field is typically something
  * like {delimiter '\x09' null '\N' escape '\'} or
  * {delimiter ',' null '' escape '\' quote '''}.  Each backslash and
  * apostrophe in a string must be escaped and each string must be
  * prepended with an 'E' denoting an "escape syntax" string.
  *
  * Usage note: A field value containing an apostrophe followed by a space
  * will throw this algorithm off -- it presumes no embedded spaces.
  */
 static char* escape_fmtopts_string(const char *src)
 {
 	int			len = strlen(src);
 	int			i;
 	int			j;
 	char	   *result = palloc0(len * 2 + 1);
 	bool		inString = false;

 	for (i = 0, j = 0; i < len; i++)
 	{
 		switch (src[i])
 		{
 			case '\'':
 				if (inString)
 				{
 					/*
 					 * Escape apostrophes *within* the string. If the
 					 * apostrophe is at the end of the source string or is
 					 * followed by a space, it is presumed to be a closing
 					 * apostrophe and is not escaped.
 					 */
 					if ((i + 1) == len || src[i + 1] == ' ')
 						inString = false;
 					else
 						result[j++] = '\\';
 				}
 				else
 				{
 					result[j++] = 'E';
 					inString = true;
 				}
 				break;
 			case '\\':
 				result[j++] = '\\';
 				break;
 		}

 		result[j++] = src[i];
 	}

 	result[j] = '\0';
 	return result;
 }

 /*
  * Tokenize a fmtopts string (for use with 'custom' formatters)
  * i.e. convert it to: a = b, format.
  * (e.g.:  formatter E'fixedwidth_in null E' ' preserve_blanks E'on')
  */
 static char* custom_fmtopts_string(const char *src)
 {
 		int			len = src ? strlen(src) : 0;
 		char	   *result = palloc0(len * 2 + 1);
 		char	   *srcdup = src ? pstrdup(src) : NULL;
 		char	   *srcdup_start = srcdup;
 		char       *find_res = NULL;
 		int        last = 0;

 		if(!src || !srcdup || !result)
 			return NULL;

 		while (srcdup)
 		{
 			/* find first word (a) */
 			find_res = strchr(srcdup, ' ');
 			if (!find_res)
 				break;
 			strncat(result, srcdup, (find_res - srcdup));
 			/* skip space */
 			srcdup = find_res + 1;
 			/* remove E if E' */
 			if((strlen(srcdup) > 2) && (srcdup[0] == 'E') && (srcdup[1] == '\''))
 				srcdup++;
 			/* add " = " */
 			strncat(result, " = ", 3);
 			/* find second word (b) until second '
 			   find \' combinations and ignore them */
 			find_res = strchr(srcdup + 1, '\'');
 			while (find_res && (*(find_res - 1) == '\\') /* ignore \' */)
 			{
 				find_res = strchr(find_res + 1, '\'');
 			}
 			if (!find_res)
 				break;
 			strncat(result, srcdup, (find_res - srcdup + 1));
 			srcdup = find_res + 1;
 			/* skip space and add ',' */
 			if (srcdup && srcdup[0] == ' ')
 			{
 				srcdup++;
 				strncat(result, ",", 1);
 			}
 		}

 		/* fix string - remove trailing ',' or '=' */
 		last = strlen(result)-1;
 		if(result[last] == ',' || result[last] == '=')
 			result[last]='\0';

 		pfree(srcdup_start);
 		return result;
 }

 static void printExtTable(Oid relationOid, ExtTableEntry* extTable)
 {

 	if (extTable == NULL)
 		return;

 	elog(DEBUG2, "extTable params: oid: %d command: %s, encoding: %d, "
 			"format: %c (%s), error table oid: %d, format options: %s, "
 			"is web: %d, is writable: %d, locations size: %d, "
 			"reject limit: %d, reject limit type: %c",
 			relationOid,
 			extTable->command ? extTable->command : "NULL",
 			extTable->encoding,
 			extTable->fmtcode,
 			parseFormat(extTable->fmtcode),
 			extTable->fmterrtbl,
 			extTable->fmtopts,
 			extTable->isweb,
 			extTable->iswritable,
 			list_length(extTable->locations),
 			extTable->rejectlimit,
 			extTable->rejectlimittype == -1 ? 'n' : extTable->rejectlimittype);
 }

 /*
  * This method returns an SQL command to create a PXF table
  * which is a copy of a given PXF table relationOid, with the following changes:
  * - PXF sample table name is pg_temp.pg_analyze_pxf_<relationOid>
  * - LOCATION part is appended 2 attributes - pxf_sample_ratio, pxf_max_fragments.
  * - in case of error table - SEGMENT REJECT LIMIT 25 PERCENT
  *
  * Input:
  * 	relationOid 		- relation to be copied
  * 	schemaName 			- schema name of original table
  * 	tableName			- table name of original table
  * 	sampleSchemaName	- schema name of new table
  * 	pxfSampleTable		- table name or new table
  * 	pxf_sample_ratio	- ratio of samplings to be done per fragment
  * 	pxf_max_fragments	- max number of fragments to be sampled
  * Output:
  * 	SQL statement string for creating the new table
  */
 static char* createPxfSampleStmt(Oid relationOid,
 		const char* schemaName, const char* tableName,
 		const char* sampleSchemaName, const char* pxfSampleTable,
 		float4 pxf_sample_ratio, int pxf_max_fragments)
 {
 	ExtTableEntry *extTable = GetExtTableEntry(relationOid);
 	StringInfoData str;
 	initStringInfo(&str);
 	char* location = NULL;
 	char* tmpstring = NULL;
 	char* escapedfmt = NULL;
 	char* tabfmt = NULL;
 	char* customfmt = NULL;

 	printExtTable(relationOid, extTable);

 	location = escape_unprintables(((Value*)list_nth(extTable->locations, 0))->val.str /*pxfLocation*/);

 	appendStringInfo(&str, "CREATE EXTERNAL TABLE %s.%s (LIKE %s.%s) "
 			"LOCATION(E'%s&STATS-SAMPLE-RATIO=%.4f&STATS-MAX-FRAGMENTS=%d') ",
 			quote_identifier(sampleSchemaName),
 			quote_identifier(pxfSampleTable),
 			quote_identifier(schemaName),
 			quote_identifier(tableName),
 			location,
 			pxf_sample_ratio,
 			pxf_max_fragments);

 	pfree(location);

 	/* add FORMAT clause */
 	escapedfmt = escape_fmtopts_string((const char *) extTable->fmtopts);
 	tmpstring = escape_unprintables((const char *) escapedfmt);
 	pfree(escapedfmt);
 	escapedfmt = NULL;

 	switch (extTable->fmtcode)
 	{
 	case 't':
 		tabfmt = "text";
 		break;
 	case 'b':
 		/*
 		 * b denotes that a custom format is used.
 		 * the fmtopts string should be formatted as:
 		 * a1 = 'val1',...,an = 'valn'
 		 *
 		 */
 		tabfmt = "custom";
 		customfmt = custom_fmtopts_string(tmpstring);
 		break;
 	default:
 		tabfmt = "csv";
 	}
 	appendStringInfo(&str, "FORMAT '%s' (%s) ",
 			tabfmt,
 			customfmt ? customfmt : tmpstring);
 	pfree(tmpstring);
 	tmpstring = NULL;
 	if (customfmt)
 	{
 		pfree(customfmt);
 		customfmt = NULL;
 	}
 	/* add ENCODING clause */
 	appendStringInfo(&str, "ENCODING '%s' ", pg_encoding_to_char(extTable->encoding));

 	/* add error control clause */
 	if (extTable->rejectlimit != -1)
 	{
 		appendStringInfo(&str, "%s", "SEGMENT REJECT LIMIT 25 PERCENT ");
 	}

 	elog(DEBUG2, "createPxfSampleStmt SQL statement: %s", str.data);

 	return str.data;
 }

 /*
  * Returns the number of tuples in the first fragment of given
  * PXF table.
  * This is done by creating a copy of the PXF table, with additional parameters
  * limiting the query to the first fragment only (pxf_max_fragments = 1, pxf_sample_ratio = 1.0),
  * and running a COUNT query on it.
  * The tuple count result is returned.
  *
  * Input:
  * 	relationOid 	- relation to be sampled
  */
 static float4 getPxfFragmentTupleCount(Oid relationOid)
 {
 	const char *schemaName = get_namespace_name(get_rel_namespace(relationOid)); /* must be pfreed */
 	const char *tableName = get_rel_name(relationOid); /* must be pfreed */
 	char	*sampleSchemaName = pstrdup("pg_temp");
 	char	*pxfEstimateTable = temporarySampleTableName(relationOid, "pg_analyze_pxf_est"); /* must be pfreed */
 	Oid			pxfEstimateTableOid = InvalidOid;
 	RangeVar 	*rangeVar = NULL;
 	float4	ntuples = -1.0;

 	/* build copy of original pxf table */
 	buildPxfTableCopy(relationOid,
 					  1.0, /* get all tuples */
 					  1, /* query only first fragment */
 					  schemaName, tableName,
 					  sampleSchemaName, pxfEstimateTable);

 	rangeVar = makeRangeVar(NULL /*catalogname*/, sampleSchemaName, pxfEstimateTable, -1);
 	pxfEstimateTableOid = RangeVarGetRelid(rangeVar, true /* failOK */, false /*allowHcatalog*/);

 	if (pxfEstimateTableOid == InvalidOid)
 	{
 		elog(ERROR, "Unable to create a copy of PXF table %s.%s", schemaName, tableName);
 	}

 	/* run count query */
 	ntuples = countFirstFragmentTuples(sampleSchemaName, pxfEstimateTable);

 	Assert(pxfEstimateTable != InvalidOid);

 	elog(DEBUG2, "ANALYZE dropping PXF estimate table %s.%s (%d)",
 		 sampleSchemaName, pxfEstimateTable, pxfEstimateTableOid);
 	dropSampleTable(pxfEstimateTableOid, true);

 	pfree((void *) rangeVar);
 	pfree((void *) pxfEstimateTable);
 	pfree((void *) tableName);
 	pfree((void *) schemaName);
 	pfree((void *) sampleSchemaName);

 	return ntuples;
 }

 static float4 countFirstFragmentTuples(const char* schemaName,
 									   const char* tableName)
 {
 	float ntuples = -1.0;
 	StringInfoData str;

 	initStringInfo(&str);
 	appendStringInfo(&str, "select count(*)::float4 from %s.%s",
 			quote_identifier(schemaName),
 			quote_identifier(tableName));

 	/* in case of PXF error, analyze on this table will be reverted */
 	spiExecuteWithCallback(str.data, false /*readonly*/, 0 /*tcount */,
 						   spiCallback_getSingleResultRowColumnAsFloat4, &ntuples);

 	pfree(str.data);

 	elog(DEBUG3, "count() of first pxf fragment gives %f values.", ntuples);

 	return ntuples;
 }

 /* --------------------------------
  *		getFragmentStats  -
  *
  *		Fetch number of fragments, size of first fragment and total size of datasource,
  *		for an external table which is PXF
  * --------------------------------
  */
 static void getFragmentStats(Relation rel, StringInfo location,
 							 float4 *numfrags, float4 *firstfragsize,
 							 float4 *totalsize)
 {

 	PxfFragmentStatsElem *elem = NULL;
 	elem = get_pxf_fragments_statistics(location->data, rel);

 	/*
 	 * if get_pxf_fragments_statistics returned NULL - probably a communication error, we
 	 * error out.
 	 */
 	if (!elem)
 	{
 		elog(ERROR, "No statistics were returned for relation %s", RelationGetRelationName(rel));
 	}

 	*numfrags = elem->numFrags;
 	*firstfragsize = elem->firstFragSize;
 	*totalsize = elem->totalSize;
 	pfree(elem);

 	elog(DEBUG2, "ANALYZE estimate for PXF table %s: fragments %f, first frag size %f, "
 			"total size %f [max int %d]",
 			RelationGetRelationName(rel), *numfrags, *firstfragsize, *totalsize, INT_MAX);
 }