diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 41f7271..83371d0 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -61,7 +61,6 @@
 #include "cdb/cdbpathlocus.h"
 #include "cdb/memquota.h"
 #include "miscadmin.h"
-#include "cdb/cdbgang.h"
 #include "cdb/dispatcher.h"
 
 #ifdef USE_ORCA
@@ -69,26 +68,6 @@
 extern StringInfo OptVersion();
 #endif
 
-typedef struct ExplainState
-{
-	/* options */
-	bool		printTList;		/* print plan targetlists */
-	bool		printAnalyze;	/* print actual times */
-	/* other states */
-	PlannedStmt *pstmt;			/* top of plan */
-	List	   *rtable;			/* range table */
-
-    /* CDB */
-	int				segmentNum;
-    struct CdbExplain_ShowStatCtx  *showstatctx;    /* EXPLAIN ANALYZE info */
-    Slice          *currentSlice;   /* slice whose nodes we are visiting */
-    ErrorData      *deferredError;  /* caught error to be re-thrown */
-    MemoryContext   explaincxt;     /* mem pool for palloc()ing buffers etc. */
-    TupOutputState *tupOutputState; /* for sending output to client */
-	StringInfoData  outbuf;         /* the output buffer */
-    StringInfoData  workbuf;        /* a scratch buffer */
-} ExplainState;
-
 extern bool Test_print_direct_dispatch_info;
 
 static void ExplainOneQuery(Query *query, ExplainStmt *stmt,
@@ -109,7 +88,7 @@
 
 static double elapsed_time(instr_time *starttime);
 static ErrorData *explain_defer_error(ExplainState *es);
-static void explain_outNode(StringInfo str,
+void explain_outNode(StringInfo str,
 				Plan *plan, PlanState *planstate,
 				Plan *outer_plan,
 				int indent, ExplainState *es, bool isSequential);
@@ -977,7 +956,7 @@
  * side of a join with the current node.  This is only interesting for
  * deciphering runtime keys of an inner indexscan.
  */
-static void
+void
 explain_outNode(StringInfo str,
 				Plan *plan, PlanState *planstate,
 				Plan *outer_plan,
diff --git a/src/backend/executor/spi.c b/src/backend/executor/spi.c
index 9b6ae9f..5b71869 100644
--- a/src/backend/executor/spi.c
+++ b/src/backend/executor/spi.c
@@ -18,6 +18,7 @@
 #include "access/xact.h"
 #include "catalog/catquery.h"
 #include "catalog/heap.h"
+#include "commands/explain.h"
 #include "commands/trigger.h"
 #include "executor/spi_priv.h"
 #include "utils/lsyscache.h"
@@ -503,6 +504,10 @@
 SPI_execute_plan(SPIPlanPtr plan, Datum *Values, const char *Nulls,
 				 bool read_only, long tcount)
 {
+	clock_t start, finish;
+	double  duration;
+	start = clock();
+
 	int			res;
 
 	if (plan == NULL || tcount < 0)
@@ -530,6 +535,10 @@
 	PG_END_TRY();
 
 	_SPI_end_call(true);
+	finish = clock();
+	duration = (double)(finish - start) / CLOCKS_PER_SEC * 1000;
+	if (Debug_udf_plan)
+		elog(LOG, "query:%s; SPI_execute_plan time:%fms", plan->query, duration);
 	return res;
 }
 
@@ -592,6 +601,10 @@
 SPIPlanPtr
 SPI_prepare(const char *src, int nargs, Oid *argtypes)
 {
+	clock_t start, finish;
+	double  duration;
+	start = clock();
+
 	_SPI_plan	plan;
 	_SPI_plan  *result;
 
@@ -636,6 +649,11 @@
 
 	_SPI_end_call(true);
 
+	finish = clock();
+	duration = (double)(finish - start) / CLOCKS_PER_SEC * 1000;
+	if (Debug_udf_plan)
+		elog(LOG, "query:%s; SPI_prepare time:%fms", src, duration);
+
 	return result;
 }
 
@@ -680,6 +698,107 @@
 	return 0;
 }
 
+void Explain_udf_plan(QueryDesc *qdesc)
+{
+	ExplainState explainState;
+	ExplainState *es = &explainState;
+	MemoryContext oldcxt = CurrentMemoryContext;
+	MemoryContext explaincxt;
+	/* Create EXPLAIN memory context. */
+	explaincxt = AllocSetContextCreate(
+			CurrentMemoryContext, "EXPLAIN working storage",
+			ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE,
+			ALLOCSET_DEFAULT_MAXSIZE);
+
+	/* Initialize ExplainState structure. */
+	memset(es, 0, sizeof(*es));
+	es->explaincxt = explaincxt;
+	es->showstatctx = NULL;
+	es->deferredError = NULL;
+	es->pstmt = qdesc->plannedstmt;
+	es->segmentNum = qdesc->plannedstmt->planner_segments;
+
+	/* Allocate output buffer and a scratch buffer. */
+	MemoryContextSwitchTo(explaincxt);
+	initStringInfoOfSize(&es->workbuf, 1000);
+	initStringInfoOfSize(&es->outbuf, 16000);
+	MemoryContextSwitchTo(oldcxt);
+
+	StringInfo  buf = &es->outbuf;
+	es->currentSlice = getCurrentSlice(
+			qdesc->estate, LocallyExecutingSliceIndex(qdesc->estate));
+	es->rtable = qdesc->plannedstmt->rtable;
+
+	bool isSequential = false;
+	int indent = 0;
+	if (qdesc->plannedstmt->planTree->dispatch == DISPATCH_SEQUENTIAL) {
+		isSequential=true;
+	}
+
+	CmdType cmd = qdesc->plannedstmt->commandType;
+	Plan *childPlan = qdesc->plannedstmt->planTree;
+
+	if ((cmd == CMD_DELETE || cmd == CMD_INSERT || cmd == CMD_UPDATE) &&
+		qdesc->plannedstmt->planGen == PLANGEN_PLANNER)
+	{
+		/* Set sliceNum to the slice number of the outer-most query plan node */
+		int sliceNum = 0;
+		int numSegments = es->segmentNum;
+		char *cmdName = NULL;
+
+		switch (cmd)
+		{
+			case CMD_DELETE:
+				cmdName = "Delete";
+				break;
+			case CMD_INSERT:
+				cmdName = "Insert";
+				break;
+			case CMD_UPDATE:
+				cmdName = "Update";
+				break;
+			default:
+				/* This should never be reached */
+				Assert(!"Unexpected statement type");
+				break;
+		}
+		appendStringInfo(buf, "%s", cmdName);
+
+		if (IsA(childPlan, Motion))
+		{
+			Motion *pMotion = (Motion *)childPlan;
+			if (pMotion->motionType == MOTIONTYPE_FIXED && pMotion->numOutputSegs != 0)
+			{
+				numSegments = 1;
+			}
+			/* else: other motion nodes execute on all segments */
+		}
+		else if ((childPlan->directDispatch).isDirectDispatch)
+		{
+			numSegments = 1;
+		}
+		appendStringInfo(buf, " (slice%d; segments: %d)", sliceNum, numSegments);
+		appendStringInfo(buf, "  (rows=%.0f width=%d)\n", ceil(childPlan->plan_rows / numSegments), childPlan->plan_width);
+		appendStringInfo(buf, "  ->  ");
+		indent = 3;
+	}
+	explain_outNode(buf, childPlan, qdesc->planstate,
+	                NULL, indent, es, isSequential);
+
+	if (qdesc->estate->dispatch_data)
+		dispatcher_print_statistics(buf, qdesc->estate->dispatch_data);
+	appendStringInfo(buf, "Data locality statistics:\n");
+	if (qdesc->plannedstmt->datalocalityInfo ==NULL){
+		appendStringInfo(buf, "  no data locality information in this query\n");
+	} else {
+		appendStringInfo(buf, "  %s\n", qdesc->plannedstmt->datalocalityInfo->data);
+	}
+
+	elog(LOG, "UDF Plan:\n%s", buf->data);
+	// free memory
+	MemoryContextDelete(explaincxt);
+}
+
 HeapTuple
 SPI_copytuple(HeapTuple tuple)
 {
@@ -1852,6 +1971,14 @@
 				}
 
 				PlannedStmt *stmt = copyObject(originalStmt);
+				if (Debug_udf_plan && originalStmt->datalocalityInfo)
+				{
+					stmt->datalocalityTime = originalStmt->datalocalityTime;
+					stmt->datalocalityInfo = makeStringInfo();
+					stmt->datalocalityInfo->cursor = originalStmt->datalocalityInfo->cursor;
+					appendStringInfoString(stmt->datalocalityInfo,
+					                       originalStmt->datalocalityInfo->data);
+				}
 
 				_SPI_current->processed = 0;
 				_SPI_current->lastoid = InvalidOid;
@@ -2211,7 +2338,13 @@
 		ExecutorStart(queryDesc, 0);
 
 		ExecutorRun(queryDesc, ForwardScanDirection, tcount);
-		
+
+		// print udf plan to pglog
+		if (Debug_udf_plan)
+		{
+			Explain_udf_plan(queryDesc);
+		}
+
 		_SPI_current->processed = queryDesc->estate->es_processed;
 		_SPI_current->lastoid = queryDesc->estate->es_lastoid;
 		
@@ -2456,6 +2589,14 @@
 			if (IsA(node, PlannedStmt))
 			{
 				PlannedStmt *ps = (PlannedStmt*) node;
+				if (Debug_udf_plan && ((PlannedStmt*)lfirst(lc))->datalocalityInfo)
+				{
+					ps->datalocalityTime = ((PlannedStmt*)lfirst(lc))->datalocalityTime;
+					ps->datalocalityInfo = makeStringInfo();
+					ps->datalocalityInfo->cursor = ((PlannedStmt*)lfirst(lc))->datalocalityInfo->cursor;
+					appendStringInfoString(ps->datalocalityInfo,
+					                       ((PlannedStmt*)lfirst(lc))->datalocalityInfo->data);
+				}
 				ps->qdContext = plancxt;
 			}
 			newplan->ptlist = lappend(newplan->ptlist, node);
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index b90f1ec..8751f8a 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -279,6 +279,7 @@
 #endif
 bool		log_duration = false;
 bool		Debug_print_plan = false;
+bool		Debug_udf_plan = false;
 bool		Debug_print_parse = false;
 bool		Debug_print_rewritten = false;
 bool		Debug_pretty_print = false;
@@ -1446,6 +1447,14 @@
 		false, assign_log_stats, NULL
 	},
 	{
+		{"debug_udf_plan", PGC_USERSET, LOGGING_WHAT,
+			gettext_noop("Prints the execution plan of udf to server log."),
+			NULL
+		},
+		&Debug_udf_plan,
+		false, NULL, NULL
+	},
+	{
 		{"log_dispatch_stats", PGC_SUSET, STATS_MONITORING,
 			gettext_noop("Writes dispatcher performance statistics to the server log."),
 			NULL,
diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h
index c00653d..1dba363 100644
--- a/src/include/commands/explain.h
+++ b/src/include/commands/explain.h
@@ -13,8 +13,28 @@
 #ifndef EXPLAIN_H
 #define EXPLAIN_H
 
+#include "cdb/cdbgang.h"
 #include "executor/executor.h"
 
+typedef struct ExplainState
+{
+	/* options */
+	bool		printTList;		/* print plan targetlists */
+	bool		printAnalyze;	/* print actual times */
+	/* other states */
+	PlannedStmt *pstmt;			/* top of plan */
+	List	   *rtable;			/* range table */
+
+    /* CDB */
+	int				segmentNum;
+    struct CdbExplain_ShowStatCtx  *showstatctx;    /* EXPLAIN ANALYZE info */
+    Slice          *currentSlice;   /* slice whose nodes we are visiting */
+    ErrorData      *deferredError;  /* caught error to be re-thrown */
+    MemoryContext   explaincxt;     /* mem pool for palloc()ing buffers etc. */
+    TupOutputState *tupOutputState; /* for sending output to client */
+	StringInfoData  outbuf;         /* the output buffer */
+    StringInfoData  workbuf;        /* a scratch buffer */
+} ExplainState;
 
 extern void ExplainQuery(ExplainStmt *stmt, const char *queryString,
 						 ParamListInfo params, DestReceiver *dest);
diff --git a/src/include/executor/spi.h b/src/include/executor/spi.h
index 7b4ba1b..74102c6 100644
--- a/src/include/executor/spi.h
+++ b/src/include/executor/spi.h
@@ -113,6 +113,8 @@
 extern SPIPlanPtr SPI_saveplan(SPIPlanPtr plan);
 extern int	SPI_freeplan(SPIPlanPtr plan);
 
+extern void Explain_udf_plan(QueryDesc *qdesc);
+
 extern Oid	SPI_getargtypeid(SPIPlanPtr plan, int argIndex);
 extern int	SPI_getargcount(SPIPlanPtr plan);
 extern bool SPI_is_cursor_plan(SPIPlanPtr plan);
diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h
index d1698a7..e8c4713 100644
--- a/src/include/utils/guc.h
+++ b/src/include/utils/guc.h
@@ -133,6 +133,7 @@
 /* GUC vars that are actually declared in guc.c, rather than elsewhere */
 extern bool log_duration;
 extern bool Debug_print_plan;
+extern bool Debug_udf_plan;
 extern bool Debug_print_parse;
 extern bool Debug_print_rewritten;
 extern bool Debug_pretty_print;
diff --git a/src/test/regress/checkinc.py b/src/test/regress/checkinc.py
index 360e9c4..f229dc9 100755
--- a/src/test/regress/checkinc.py
+++ b/src/test/regress/checkinc.py
@@ -102,6 +102,7 @@
     'json-c/json.h':     [],
     'emmintrin.h':       [],
     'cdb/cdbhash.h':     [],
+    'cdb/cdbgang.h':     [],
 }
 
 
