| /* | 
 |  * Licensed to the Apache Software Foundation (ASF) under one | 
 |  * or more contributor license agreements.  See the NOTICE file | 
 |  * distributed with this work for additional information | 
 |  * regarding copyright ownership.  The ASF licenses this file | 
 |  * to you under the Apache License, Version 2.0 (the | 
 |  * "License"); you may not use this file except in compliance | 
 |  * with the License.  You may obtain a copy of the License at | 
 |  * | 
 |  *   http://www.apache.org/licenses/LICENSE-2.0 | 
 |  * | 
 |  * Unless required by applicable law or agreed to in writing, | 
 |  * software distributed under the License is distributed on an | 
 |  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | 
 |  * KIND, either express or implied.  See the License for the | 
 |  * specific language governing permissions and limitations | 
 |  * under the License. | 
 |  */ | 
 | /*------------------------------------------------------------------------- | 
 |  * | 
 |  * pxfanalyze.c | 
 |  *	  Helper functions to perform ANALYZE on PXF tables. | 
 |  *------------------------------------------------------------------------- | 
 |  */ | 
 |  | 
 | #include "postgres.h" | 
 | #include <curl/curl.h> | 
 | #include <json-c/json.h> | 
 | #include "access/hd_work_mgr.h" | 
 | #include "access/pxfanalyze.h" | 
 | #include "catalog/namespace.h" | 
 | #include "catalog/pg_exttable.h" | 
 | #include "cdb/cdbanalyze.h" | 
 | #include "commands/analyzeutils.h" | 
 | #include "lib/stringinfo.h" | 
 | #include "nodes/makefuncs.h" | 
 | #include "utils/builtins.h" | 
 | #include "utils/elog.h" | 
 | #include "utils/guc.h" | 
 | #include "utils/lsyscache.h" | 
 |  | 
 |  | 
 | static void buildPxfTableCopy(Oid relationOid, | 
 | 		float4	samplingRatio, | 
 | 		int pxfStatMaxFragments, | 
 | 		const char* schemaName, const char* tableName, | 
 | 		const char* sampleSchemaName, const char* pxfSampleTable); | 
 | static void buildSampleFromPxf(const char* sampleSchemaName, | 
 | 		const char* sampleTableName, | 
 | 		const char* pxfSampleTable, | 
 | 		List *lAttributeNames, | 
 | 		float4 *sampleTableRelTuples); | 
 |  | 
 | static float4 calculateSamplingRatio(float4 relTuples, | 
 | 									 float4 relFrags, | 
 | 									 float4 requestedSampleSize); | 
 |  | 
 | static char* parseFormat(char fmtcode); | 
 | static char* escape_unprintables(const char *src); | 
 | static char* escape_fmtopts_string(const char *src); | 
 | static char* custom_fmtopts_string(const char *src); | 
 | static void printExtTable(Oid relationOid, ExtTableEntry* extTable); | 
 | static char* createPxfSampleStmt(Oid relationOid, | 
 | 		const char* schemaName, const char* tableName, | 
 | 		const char* sampleSchemaName, const char* pxfSampleTable, | 
 | 		float4 pxf_sample_ratio, int pxf_max_fragments); | 
 | static float4 getPxfFragmentTupleCount(Oid relationOid); | 
 | static float4 countFirstFragmentTuples(const char* schemaName, | 
 | 									   const char* tableName); | 
 | static void getFragmentStats(Relation rel, StringInfo location, | 
 | 							 float4 *numfrags, float4 *firstfragsize, | 
 | 							 float4 *totalsize); | 
 |  | 
 |  | 
 | void analyzePxfEstimateReltuplesRelpages(Relation relation, | 
 | 		StringInfo location, | 
 | 		float4* estimatedRelTuples, | 
 | 		float4* estimatedRelPages) | 
 | { | 
 |  | 
 | 	float4 numFrags = 0.0; | 
 | 	float4 firstFragSize = 0.0; | 
 | 	float4 totalSize = 0.0; | 
 |  | 
 | 	float4 firstFragTuples = 0.0; | 
 | 	float4 estimatedTuples = 0.0; | 
 |  | 
 | 	/* get number of fragments, size of first fragment and total size. | 
 | 	 * This is used together with the number of tuples in first fragment | 
 | 	 * to estimate the number of tuples in the table. */ | 
 | 	getFragmentStats(relation, location, &numFrags, &firstFragSize, &totalSize); | 
 |  | 
 | 	/* get number of tuples from first fragment */ | 
 | 	firstFragTuples = getPxfFragmentTupleCount(relation->rd_id); | 
 |  | 
 | 	/* calculate estimated tuple count */ | 
 | 	if (firstFragTuples > 0) | 
 | 	{ | 
 | 		Assert(firstFragSize > 0); | 
 | 		Assert(totalSize > 0); | 
 | 		/* The calculation: | 
 | 		 * size of each tuple = first fragment size / first fragment row | 
 | 		 * total size = size of each tuple * number of tuples | 
 | 		 * number of tuples = total size / size of each tuple | 
 | 		 */ | 
 | 		estimatedTuples = (totalSize / firstFragSize) * firstFragTuples; | 
 | 	} | 
 |  | 
 | 	elog(DEBUG2, "Estimated tuples for PXF table: %f. (first fragment count %f, fragments number %f, old estimate %f)", | 
 | 		 estimatedTuples, firstFragTuples, numFrags, *estimatedRelTuples); | 
 |  | 
 | 	*estimatedRelTuples = estimatedTuples; | 
 | 	*estimatedRelPages = numFrags; | 
 |  | 
 | 	/* relpages can't be 0 if there are tuples in the table. */ | 
 | 	if ((*estimatedRelPages < 1.0) && (estimatedTuples > 0)) | 
 | 	{ | 
 | 		*estimatedRelPages = 1.0; | 
 | 	} | 
 |  | 
 | 	/* in case there were problems with the PXF service, keep the defaults */ | 
 | 	if (*estimatedRelPages < 0) | 
 | 	{ | 
 | 		*estimatedRelPages =  gp_external_table_default_number_of_pages; | 
 | 	} | 
 | 	if (*estimatedRelTuples < 0) | 
 | 	{ | 
 | 		*estimatedRelTuples =  gp_external_table_default_number_of_tuples; | 
 | 	} | 
 | } | 
 |  | 
 | /* | 
 |  * Creates a sample table with data from a PXF table. | 
 |  * We need to create a copy of the PXF table, in order to pass the sampling | 
 |  * parameters pxf_sample_ratio and pxf_max_fragments as attributes, | 
 |  * and to create a segment reject limit of 25 percent. | 
 |  * | 
 |  * The new PXF table is sampled and the results are saved in the returned sample table. | 
 |  * Note that ANALYZE can be executed only by the database owner. | 
 |  * It is safe to assume that the database owner has permissions to create temp tables. | 
 |  * The sampling is done by uniformly sampling pxf_sample_ratio records of each fragments, | 
 |  * up to pxf_max_fragments. | 
 |  * | 
 |  * Input: | 
 |  * 	relationOid 	- relation to be sampled | 
 |  * 	sampleTableName - sample table name, moderately unique | 
 |  * 	lAttributeNames - attributes to be included in the sample | 
 |  * 	relTuples		- estimated size of relation | 
 |  * 	relFrags		- estimated number of fragments in relation | 
 |  * 	requestedSampleSize - as determined by attribute statistics requirements. | 
 |  * 	sampleTableRelTuples	- limit on size of the sample. | 
 |  * Output: | 
 |  * 	sampleTableRelTuples - number of tuples in the sample table created. | 
 |  */ | 
 | Oid buildPxfSampleTable(Oid relationOid, | 
 | 		char* sampleTableName, | 
 | 		List *lAttributeNames, | 
 | 		float4	relTuples, | 
 | 		float4  relFrags, | 
 | 		float4 	requestedSampleSize, | 
 | 		float4 *sampleTableRelTuples) | 
 | { | 
 | 	const char *schemaName = get_namespace_name(get_rel_namespace(relationOid)); /* must be pfreed */ | 
 | 	const char *tableName = get_rel_name(relationOid); /* must be pfreed */ | 
 | 	char	*sampleSchemaName = pstrdup("pg_temp"); | 
 | 	char	*pxfSampleTable = temporarySampleTableName(relationOid, "pg_analyze_pxf"); /* must be pfreed */ | 
 | 	Oid			sampleTableOid = InvalidOid; | 
 | 	Oid			pxfSampleTableOid = InvalidOid; | 
 | 	RangeVar 	*rangeVar = NULL; | 
 | 	float4 pxfSamplingRatio = 0.0; | 
 |  | 
 | 	Assert(requestedSampleSize > 0.0); | 
 | 	Assert(relTuples > 0.0); | 
 | 	Assert(relFrags > 0.0); | 
 |  | 
 | 	/* calculate pxf_sample_ratio */ | 
 | 	pxfSamplingRatio = calculateSamplingRatio(relTuples, relFrags, requestedSampleSize); | 
 |  | 
 | 	/* build copy of original pxf table */ | 
 | 	buildPxfTableCopy(relationOid, | 
 | 					  pxfSamplingRatio, | 
 | 					  pxf_stat_max_fragments, | 
 | 					  schemaName, tableName, | 
 | 					  sampleSchemaName, pxfSampleTable); | 
 |  | 
 | 	rangeVar = makeRangeVar(NULL /*catalogname*/, sampleSchemaName, pxfSampleTable, -1); | 
 | 	pxfSampleTableOid = RangeVarGetRelid(rangeVar, true /* failOK */, false /*allowHcatalog*/); | 
 |  | 
 | 	buildSampleFromPxf(sampleSchemaName, sampleTableName, pxfSampleTable, | 
 | 					   lAttributeNames, sampleTableRelTuples); | 
 |  | 
 | 	rangeVar = makeRangeVar(NULL /*catalogname*/, sampleSchemaName, sampleTableName, -1); | 
 | 	sampleTableOid = RangeVarGetRelid(rangeVar, true /* failOK */, false /*allowHcatalog*/); | 
 |  | 
 | 	Assert(sampleTableOid != InvalidOid); | 
 |  | 
 | 	/** | 
 | 	 * MPP-10723: Very rarely, we may be unlucky and generate an empty sample table. We error out in this case rather than | 
 | 	 * generate bad statistics. | 
 | 	 */ | 
 |  | 
 | 	if (*sampleTableRelTuples < 1.0) | 
 | 	{ | 
 | 		elog(ERROR, "ANALYZE unable to generate accurate statistics on table %s.%s. Try lowering gp_analyze_relative_error", | 
 | 				quote_identifier(schemaName), | 
 | 				quote_identifier(tableName)); | 
 | 	} | 
 |  | 
 | 	if (pxfSampleTableOid != InvalidOid) | 
 | 	{ | 
 | 		elog(DEBUG2, "ANALYZE dropping PXF sample table"); | 
 | 		dropSampleTable(pxfSampleTableOid, true); | 
 | 	} | 
 |  | 
 | 	pfree((void *) rangeVar); | 
 | 	pfree((void *) pxfSampleTable); | 
 | 	pfree((void *) tableName); | 
 | 	pfree((void *) schemaName); | 
 | 	pfree((void *) sampleSchemaName); | 
 | 	return sampleTableOid; | 
 | } | 
 |  | 
 | /* | 
 |  * Creates an external PXF table, with the same properties | 
 |  * as the given PXF table to be sampled, other than additional | 
 |  * 2 attributes in the location clause - | 
 |  * pxf_stats_sample_ratio and pxf_stats_max_fragments, | 
 |  * and a segment reject limit of 25 percent. | 
 |  */ | 
 | static void buildPxfTableCopy(Oid relationOid, | 
 | 		float4 samplingRatio, | 
 | 		int pxfStatMaxFragments, | 
 | 		const char* schemaName, const char* tableName, | 
 | 		const char* sampleSchemaName, const char* pxfSampleTable) | 
 | { | 
 |  | 
 | 	/* create table string */ | 
 | 	char* createPxfSampleStr = createPxfSampleStmt(relationOid, | 
 | 			schemaName, tableName, | 
 | 			sampleSchemaName, pxfSampleTable, | 
 | 			samplingRatio, pxfStatMaxFragments); | 
 |  | 
 | 	spiExecuteWithCallback(createPxfSampleStr, false /*readonly*/, 0 /*tcount */, | 
 | 			NULL, NULL); | 
 |  | 
 | 	pfree(createPxfSampleStr); | 
 |  | 
 | 	elog(DEBUG2, "Created PXF table %s.%s for sampling PXF table %s.%s", | 
 | 			quote_identifier(sampleSchemaName), | 
 | 			quote_identifier(pxfSampleTable), | 
 | 			quote_identifier(schemaName), | 
 | 			quote_identifier(tableName)); | 
 | } | 
 |  | 
 | /* | 
 |  * Creates and populates a sample table for a PXF table. | 
 |  * The actual queried table is not the original PXF table but a copy of it | 
 |  * with additional attributes to enable sampling. | 
 |  * | 
 |  * The results are stored in sampleTableRelTuples. | 
 |  */ | 
 | static void buildSampleFromPxf(const char* sampleSchemaName, | 
 | 		const char* sampleTableName, | 
 | 		const char* pxfSampleTable, | 
 | 		List *lAttributeNames, | 
 | 		float4 *sampleTableRelTuples) | 
 | { | 
 | 	int nAttributes = -1; | 
 | 	int i = 0; | 
 | 	ListCell *le = NULL; | 
 | 	StringInfoData str; | 
 |  | 
 | 	initStringInfo(&str); | 
 |  | 
 | 	appendStringInfo(&str, "create table %s.%s as (select ", | 
 | 			quote_identifier(sampleSchemaName), quote_identifier(sampleTableName)); | 
 |  | 
 | 	nAttributes = list_length(lAttributeNames); | 
 |  | 
 | 	foreach_with_count(le, lAttributeNames, i) | 
 | 	{ | 
 | 		appendStringInfo(&str, "Ta.%s", quote_identifier((const char *) lfirst(le))); | 
 | 		if (i < nAttributes - 1) | 
 | 		{ | 
 | 			appendStringInfo(&str, ", "); | 
 | 		} | 
 | 		else | 
 | 		{ | 
 | 			appendStringInfo(&str, " "); | 
 | 		} | 
 | 	} | 
 |  | 
 | 	appendStringInfo(&str, "from %s.%s as Ta) distributed randomly", | 
 | 			quote_identifier(sampleSchemaName), | 
 | 			quote_identifier(pxfSampleTable)); | 
 |  | 
 | 	/* in case of PXF error, analyze on this table will reverted */ | 
 | 	spiExecuteWithCallback(str.data, false /*readonly*/, 0 /*tcount */, | 
 | 			spiCallback_getProcessedAsFloat4, sampleTableRelTuples); | 
 |  | 
 | 	pfree(str.data); | 
 |  | 
 | 	elog(DEBUG2, "Created sample table %s.%s with nrows=%.0f", | 
 | 			quote_identifier(sampleSchemaName), | 
 | 			quote_identifier(sampleTableName), | 
 | 			*sampleTableRelTuples); | 
 | } | 
 |  | 
 | /* | 
 |  * Returns a sampling ratio - a fraction between 1.0 and 0.0001 | 
 |  * representing how many samples should be returned from each fragment | 
 |  * of a PXF table. | 
 |  * The ratio is calculated based on the tuples estimate of the table | 
 |  * and on the number of the actually sampled fragments | 
 |  * (GUC pxf_stat_max_fragments), by the following formula: | 
 |  * ratio = (<sample size> / <tuples estimate>) * (<total # fragments> / <fragments to be sampled>) | 
 |  * If the ratio is too big or small, it is corrected to 1.0 or 0.0001 respectively. | 
 |  * | 
 |  * Input: | 
 |  * 	relTuples		- number of tuples in the table | 
 |  * 	relFrags		- number of fragments in the table | 
 |  * 	requestedSampleSize - number of sample tuples required | 
 |  * Output: | 
 |  * 	the sampling ratio for the table. | 
 |  */ | 
 | static float4 calculateSamplingRatio(float4 relTuples, | 
 | 		 float4 relFrags, | 
 | 		 float4 requestedSampleSize) | 
 | { | 
 | 	float4 sampleRatio = 0.0; | 
 |  | 
 | 	Assert(relFrags > 0); | 
 | 	Assert(relTuples > 0); | 
 | 	Assert(requestedSampleSize > 0); | 
 |  | 
 | 	/* sample ratio for regular tables */ | 
 | 	sampleRatio = requestedSampleSize / relTuples; | 
 |  | 
 | 	if (pxf_stat_max_fragments < relFrags) | 
 | 	{ | 
 | 		/* | 
 | 		 * Correct ratio according to the number of sampled fragments. | 
 | 		 * If there are less fragments to sample, the ratio should be increased. | 
 | 		 * If the corrected sampling ratio is > 100%, make it 100% | 
 | 		 */ | 
 | 		sampleRatio = sampleRatio * (relFrags / pxf_stat_max_fragments); | 
 | 		if (sampleRatio > 1.0) | 
 | 		{ | 
 | 			sampleRatio = 1.0; | 
 | 		} | 
 | 	} | 
 |  | 
 | 	/* | 
 | 	 * If the ratio is too low (< 0.0001), correct it to 0.0001. | 
 | 	 * That means that the lowest rate we will get is 1 tuple per 10,000. | 
 | 	 */ | 
 | 	if (sampleRatio < 0.0001) | 
 | 	{ | 
 | 		sampleRatio = 0.0001; | 
 | 	} | 
 |  | 
 | 	elog(DEBUG2, "PXF ANALYZE: pxf_stats_sample_ratio = %f, pxf_stats_max_fragments = %d, table fragments = %f", | 
 | 		 sampleRatio, pxf_stat_max_fragments, relFrags); | 
 | 	return sampleRatio; | 
 | } | 
 |  | 
 | static char* parseFormat(char fmtcode) | 
 | { | 
 | 	if (fmttype_is_custom(fmtcode)) | 
 | 		return "CUSTOM"; | 
 | 	if (fmttype_is_text(fmtcode)) | 
 | 		return "TEXT"; | 
 | 	if (fmttype_is_csv(fmtcode)) | 
 | 		return "CSV"; | 
 |  | 
 | 	elog(ERROR, "Unrecognized external table format '%c'", fmtcode); | 
 | 	return NULL; | 
 | } | 
 |  | 
 | /* Helper functions from dumputils.c, modified to backend (malloc->palloc) */ | 
 |  | 
 | /* | 
 |  * Escape any unprintables (0x00 - 0x1F) in given string | 
 |  */ | 
 | char * | 
 | escape_unprintables(const char *src) | 
 | { | 
 | 	int			len = strlen(src), | 
 | 				i, | 
 | 				j; | 
 | 	char	   *result = palloc0(len * 4 + 1); | 
 | 	if (!result) | 
 | 		return NULL; /* out of memory */ | 
 |  | 
 | 	for (i = 0, j = 0; i < len; i++) | 
 | 	{ | 
 | 		if ((src[i] <= '\x1F') && (src[i] != '\x09' /* TAB */)) | 
 | 		{ | 
 | 			snprintf(&(result[j]), 5, "\\x%02X", src[i]); | 
 | 			j += 4; | 
 | 		} | 
 | 		else | 
 | 			result[j++] = src[i]; | 
 | 	} | 
 | 	result[j] = '\0'; | 
 | 	return result; | 
 | } | 
 |  | 
 | /* | 
 |  * Escape backslashes and apostrophes in EXTERNAL TABLE format strings. | 
 |  * | 
 |  * The fmtopts field of a pg_exttable tuple has an odd encoding -- it is | 
 |  * partially parsed and contains "string" values that aren't legal SQL. | 
 |  * Each string value is delimited by apostrophes and is usually, but not | 
 |  * always, a single character.	The fmtopts field is typically something | 
 |  * like {delimiter '\x09' null '\N' escape '\'} or | 
 |  * {delimiter ',' null '' escape '\' quote '''}.  Each backslash and | 
 |  * apostrophe in a string must be escaped and each string must be | 
 |  * prepended with an 'E' denoting an "escape syntax" string. | 
 |  * | 
 |  * Usage note: A field value containing an apostrophe followed by a space | 
 |  * will throw this algorithm off -- it presumes no embedded spaces. | 
 |  */ | 
 | static char* escape_fmtopts_string(const char *src) | 
 | { | 
 | 	int			len = strlen(src); | 
 | 	int			i; | 
 | 	int			j; | 
 | 	char	   *result = palloc0(len * 2 + 1); | 
 | 	bool		inString = false; | 
 |  | 
 | 	for (i = 0, j = 0; i < len; i++) | 
 | 	{ | 
 | 		switch (src[i]) | 
 | 		{ | 
 | 			case '\'': | 
 | 				if (inString) | 
 | 				{ | 
 | 					/* | 
 | 					 * Escape apostrophes *within* the string. If the | 
 | 					 * apostrophe is at the end of the source string or is | 
 | 					 * followed by a space, it is presumed to be a closing | 
 | 					 * apostrophe and is not escaped. | 
 | 					 */ | 
 | 					if ((i + 1) == len || src[i + 1] == ' ') | 
 | 						inString = false; | 
 | 					else | 
 | 						result[j++] = '\\'; | 
 | 				} | 
 | 				else | 
 | 				{ | 
 | 					result[j++] = 'E'; | 
 | 					inString = true; | 
 | 				} | 
 | 				break; | 
 | 			case '\\': | 
 | 				result[j++] = '\\'; | 
 | 				break; | 
 | 		} | 
 |  | 
 | 		result[j++] = src[i]; | 
 | 	} | 
 |  | 
 | 	result[j] = '\0'; | 
 | 	return result; | 
 | } | 
 |  | 
 | /* | 
 |  * Tokenize a fmtopts string (for use with 'custom' formatters) | 
 |  * i.e. convert it to: a = b, format. | 
 |  * (e.g.:  formatter E'fixedwidth_in null E' ' preserve_blanks E'on') | 
 |  */ | 
 | static char* custom_fmtopts_string(const char *src) | 
 | { | 
 | 		int			len = src ? strlen(src) : 0; | 
 | 		char	   *result = palloc0(len * 2 + 1); | 
 | 		char	   *srcdup = src ? pstrdup(src) : NULL; | 
 | 		char	   *srcdup_start = srcdup; | 
 | 		char       *find_res = NULL; | 
 | 		int        last = 0; | 
 |  | 
 | 		if(!src || !srcdup || !result) | 
 | 			return NULL; | 
 |  | 
 | 		while (srcdup) | 
 | 		{ | 
 | 			/* find first word (a) */ | 
 | 			find_res = strchr(srcdup, ' '); | 
 | 			if (!find_res) | 
 | 				break; | 
 | 			strncat(result, srcdup, (find_res - srcdup)); | 
 | 			/* skip space */ | 
 | 			srcdup = find_res + 1; | 
 | 			/* remove E if E' */ | 
 | 			if((strlen(srcdup) > 2) && (srcdup[0] == 'E') && (srcdup[1] == '\'')) | 
 | 				srcdup++; | 
 | 			/* add " = " */ | 
 | 			strncat(result, " = ", 3); | 
 | 			/* find second word (b) until second ' | 
 | 			   find \' combinations and ignore them */ | 
 | 			find_res = strchr(srcdup + 1, '\''); | 
 | 			while (find_res && (*(find_res - 1) == '\\') /* ignore \' */) | 
 | 			{ | 
 | 				find_res = strchr(find_res + 1, '\''); | 
 | 			} | 
 | 			if (!find_res) | 
 | 				break; | 
 | 			strncat(result, srcdup, (find_res - srcdup + 1)); | 
 | 			srcdup = find_res + 1; | 
 | 			/* skip space and add ',' */ | 
 | 			if (srcdup && srcdup[0] == ' ') | 
 | 			{ | 
 | 				srcdup++; | 
 | 				strncat(result, ",", 1); | 
 | 			} | 
 | 		} | 
 |  | 
 | 		/* fix string - remove trailing ',' or '=' */ | 
 | 		last = strlen(result)-1; | 
 | 		if(result[last] == ',' || result[last] == '=') | 
 | 			result[last]='\0'; | 
 |  | 
 | 		pfree(srcdup_start); | 
 | 		return result; | 
 | } | 
 |  | 
 | static void printExtTable(Oid relationOid, ExtTableEntry* extTable) | 
 | { | 
 |  | 
 | 	if (extTable == NULL) | 
 | 		return; | 
 |  | 
 | 	elog(DEBUG2, "extTable params: oid: %d command: %s, encoding: %d, " | 
 | 			"format: %c (%s), error table oid: %d, format options: %s, " | 
 | 			"is web: %d, is writable: %d, locations size: %d, " | 
 | 			"reject limit: %d, reject limit type: %c", | 
 | 			relationOid, | 
 | 			extTable->command ? extTable->command : "NULL", | 
 | 			extTable->encoding, | 
 | 			extTable->fmtcode, | 
 | 			parseFormat(extTable->fmtcode), | 
 | 			extTable->fmterrtbl, | 
 | 			extTable->fmtopts, | 
 | 			extTable->isweb, | 
 | 			extTable->iswritable, | 
 | 			list_length(extTable->locations), | 
 | 			extTable->rejectlimit, | 
 | 			extTable->rejectlimittype == -1 ? 'n' : extTable->rejectlimittype); | 
 | } | 
 |  | 
 | /* | 
 |  * This method returns an SQL command to create a PXF table | 
 |  * which is a copy of a given PXF table relationOid, with the following changes: | 
 |  * - PXF sample table name is pg_temp.pg_analyze_pxf_<relationOid> | 
 |  * - LOCATION part is appended 2 attributes - pxf_sample_ratio, pxf_max_fragments. | 
 |  * - in case of error table - SEGMENT REJECT LIMIT 25 PERCENT | 
 |  * | 
 |  * Input: | 
 |  * 	relationOid 		- relation to be copied | 
 |  * 	schemaName 			- schema name of original table | 
 |  * 	tableName			- table name of original table | 
 |  * 	sampleSchemaName	- schema name of new table | 
 |  * 	pxfSampleTable		- table name or new table | 
 |  * 	pxf_sample_ratio	- ratio of samplings to be done per fragment | 
 |  * 	pxf_max_fragments	- max number of fragments to be sampled | 
 |  * Output: | 
 |  * 	SQL statement string for creating the new table | 
 |  */ | 
 | static char* createPxfSampleStmt(Oid relationOid, | 
 | 		const char* schemaName, const char* tableName, | 
 | 		const char* sampleSchemaName, const char* pxfSampleTable, | 
 | 		float4 pxf_sample_ratio, int pxf_max_fragments) | 
 | { | 
 | 	ExtTableEntry *extTable = GetExtTableEntry(relationOid); | 
 | 	StringInfoData str; | 
 | 	initStringInfo(&str); | 
 | 	char* location = NULL; | 
 | 	char* tmpstring = NULL; | 
 | 	char* escapedfmt = NULL; | 
 | 	char* tabfmt = NULL; | 
 | 	char* customfmt = NULL; | 
 |  | 
 | 	printExtTable(relationOid, extTable); | 
 |  | 
 | 	location = escape_unprintables(((Value*)list_nth(extTable->locations, 0))->val.str /*pxfLocation*/); | 
 |  | 
 | 	appendStringInfo(&str, "CREATE EXTERNAL TABLE %s.%s (LIKE %s.%s) " | 
 | 			"LOCATION(E'%s&STATS-SAMPLE-RATIO=%.4f&STATS-MAX-FRAGMENTS=%d') ", | 
 | 			quote_identifier(sampleSchemaName), | 
 | 			quote_identifier(pxfSampleTable), | 
 | 			quote_identifier(schemaName), | 
 | 			quote_identifier(tableName), | 
 | 			location, | 
 | 			pxf_sample_ratio, | 
 | 			pxf_max_fragments); | 
 |  | 
 | 	pfree(location); | 
 |  | 
 | 	/* add FORMAT clause */ | 
 | 	escapedfmt = escape_fmtopts_string((const char *) extTable->fmtopts); | 
 | 	tmpstring = escape_unprintables((const char *) escapedfmt); | 
 | 	pfree(escapedfmt); | 
 | 	escapedfmt = NULL; | 
 |  | 
 | 	switch (extTable->fmtcode) | 
 | 	{ | 
 | 	case 't': | 
 | 		tabfmt = "text"; | 
 | 		break; | 
 | 	case 'b': | 
 | 		/* | 
 | 		 * b denotes that a custom format is used. | 
 | 		 * the fmtopts string should be formatted as: | 
 | 		 * a1 = 'val1',...,an = 'valn' | 
 | 		 * | 
 | 		 */ | 
 | 		tabfmt = "custom"; | 
 | 		customfmt = custom_fmtopts_string(tmpstring); | 
 | 		break; | 
 | 	default: | 
 | 		tabfmt = "csv"; | 
 | 	} | 
 | 	appendStringInfo(&str, "FORMAT '%s' (%s) ", | 
 | 			tabfmt, | 
 | 			customfmt ? customfmt : tmpstring); | 
 | 	pfree(tmpstring); | 
 | 	tmpstring = NULL; | 
 | 	if (customfmt) | 
 | 	{ | 
 | 		pfree(customfmt); | 
 | 		customfmt = NULL; | 
 | 	} | 
 | 	/* add ENCODING clause */ | 
 | 	appendStringInfo(&str, "ENCODING '%s' ", pg_encoding_to_char(extTable->encoding)); | 
 |  | 
 | 	/* add error control clause */ | 
 | 	if (extTable->rejectlimit != -1) | 
 | 	{ | 
 | 		appendStringInfo(&str, "%s", "SEGMENT REJECT LIMIT 25 PERCENT "); | 
 | 	} | 
 |  | 
 | 	elog(DEBUG2, "createPxfSampleStmt SQL statement: %s", str.data); | 
 |  | 
 | 	return str.data; | 
 | } | 
 |  | 
 | /* | 
 |  * Returns the number of tuples in the first fragment of given | 
 |  * PXF table. | 
 |  * This is done by creating a copy of the PXF table, with additional parameters | 
 |  * limiting the query to the first fragment only (pxf_max_fragments = 1, pxf_sample_ratio = 1.0), | 
 |  * and running a COUNT query on it. | 
 |  * The tuple count result is returned. | 
 |  * | 
 |  * Input: | 
 |  * 	relationOid 	- relation to be sampled | 
 |  */ | 
 | static float4 getPxfFragmentTupleCount(Oid relationOid) | 
 | { | 
 | 	const char *schemaName = get_namespace_name(get_rel_namespace(relationOid)); /* must be pfreed */ | 
 | 	const char *tableName = get_rel_name(relationOid); /* must be pfreed */ | 
 | 	char	*sampleSchemaName = pstrdup("pg_temp"); | 
 | 	char	*pxfEstimateTable = temporarySampleTableName(relationOid, "pg_analyze_pxf_est"); /* must be pfreed */ | 
 | 	Oid			pxfEstimateTableOid = InvalidOid; | 
 | 	RangeVar 	*rangeVar = NULL; | 
 | 	float4	ntuples = -1.0; | 
 |  | 
 | 	/* build copy of original pxf table */ | 
 | 	buildPxfTableCopy(relationOid, | 
 | 					  1.0, /* get all tuples */ | 
 | 					  1, /* query only first fragment */ | 
 | 					  schemaName, tableName, | 
 | 					  sampleSchemaName, pxfEstimateTable); | 
 |  | 
 | 	rangeVar = makeRangeVar(NULL /*catalogname*/, sampleSchemaName, pxfEstimateTable, -1); | 
 | 	pxfEstimateTableOid = RangeVarGetRelid(rangeVar, true /* failOK */, false /*allowHcatalog*/); | 
 |  | 
 | 	if (pxfEstimateTableOid == InvalidOid) | 
 | 	{ | 
 | 		elog(ERROR, "Unable to create a copy of PXF table %s.%s", schemaName, tableName); | 
 | 	} | 
 |  | 
 | 	/* run count query */ | 
 | 	ntuples = countFirstFragmentTuples(sampleSchemaName, pxfEstimateTable); | 
 |  | 
 | 	Assert(pxfEstimateTable != InvalidOid); | 
 |  | 
 | 	elog(DEBUG2, "ANALYZE dropping PXF estimate table %s.%s (%d)", | 
 | 		 sampleSchemaName, pxfEstimateTable, pxfEstimateTableOid); | 
 | 	dropSampleTable(pxfEstimateTableOid, true); | 
 |  | 
 | 	pfree((void *) rangeVar); | 
 | 	pfree((void *) pxfEstimateTable); | 
 | 	pfree((void *) tableName); | 
 | 	pfree((void *) schemaName); | 
 | 	pfree((void *) sampleSchemaName); | 
 |  | 
 | 	return ntuples; | 
 | } | 
 |  | 
 | static float4 countFirstFragmentTuples(const char* schemaName, | 
 | 									   const char* tableName) | 
 | { | 
 | 	float ntuples = -1.0; | 
 | 	StringInfoData str; | 
 |  | 
 | 	initStringInfo(&str); | 
 | 	appendStringInfo(&str, "select count(*)::float4 from %s.%s", | 
 | 			quote_identifier(schemaName), | 
 | 			quote_identifier(tableName)); | 
 |  | 
 | 	/* in case of PXF error, analyze on this table will be reverted */ | 
 | 	spiExecuteWithCallback(str.data, false /*readonly*/, 0 /*tcount */, | 
 | 						   spiCallback_getSingleResultRowColumnAsFloat4, &ntuples); | 
 |  | 
 | 	pfree(str.data); | 
 |  | 
 | 	elog(DEBUG3, "count() of first pxf fragment gives %f values.", ntuples); | 
 |  | 
 | 	return ntuples; | 
 | } | 
 |  | 
 | /* -------------------------------- | 
 |  *		getFragmentStats  - | 
 |  * | 
 |  *		Fetch number of fragments, size of first fragment and total size of datasource, | 
 |  *		for an external table which is PXF | 
 |  * -------------------------------- | 
 |  */ | 
 | static void getFragmentStats(Relation rel, StringInfo location, | 
 | 							 float4 *numfrags, float4 *firstfragsize, | 
 | 							 float4 *totalsize) | 
 | { | 
 |  | 
 | 	PxfFragmentStatsElem *elem = NULL; | 
 | 	elem = get_pxf_fragments_statistics(location->data, rel); | 
 |  | 
 | 	/* | 
 | 	 * if get_pxf_fragments_statistics returned NULL - probably a communication error, we | 
 | 	 * error out. | 
 | 	 */ | 
 | 	if (!elem) | 
 | 	{ | 
 | 		elog(ERROR, "No statistics were returned for relation %s", RelationGetRelationName(rel)); | 
 | 	} | 
 |  | 
 | 	*numfrags = elem->numFrags; | 
 | 	*firstfragsize = elem->firstFragSize; | 
 | 	*totalsize = elem->totalSize; | 
 | 	pfree(elem); | 
 |  | 
 | 	elog(DEBUG2, "ANALYZE estimate for PXF table %s: fragments %f, first frag size %f, " | 
 | 			"total size %f [max int %d]", | 
 | 			RelationGetRelationName(rel), *numfrags, *firstfragsize, *totalsize, INT_MAX); | 
 | } |