Fix complex MERGE causes crash (#897) (#961)

Fixed the complex MERGE crash, which was due to storing variables
in tuple positions that did not exist. This happened on MERGE
commands where there wasn't a RETURN clause.

Added logic to catch these and handle appropriately. Meaning,
discarding the variable results, instead of storing them.

Fixing this issue has highlighted the need to fix variable reuse
in the MERGE transform itself. This is because MERGE preprocesses
the path before handing it off to anything else.

Added regression tests.
diff --git a/regress/expected/cypher_merge.out b/regress/expected/cypher_merge.out
index 9699794..ecfc0c1 100644
--- a/regress/expected/cypher_merge.out
+++ b/regress/expected/cypher_merge.out
@@ -901,6 +901,53 @@
  {"id": 2533274790395907, "label": "node", "properties": {"age": 23, "name": "Lisa", "gender": "Female"}}::vertex
 (2 rows)
 
+--
+-- Complex MERGE w/wo RETURN values
+--
+-- These should each create a path, if it doesn't already exist.
+-- TODO Until the issue with variable reuse of 'x' in MERGE is corrected,
+--      these commands will each create a new path.
+SELECT * FROM cypher('cypher_merge', $$ MERGE ()-[:B]->(x:C)-[:E]->(x:C)<-[f:F]-(y:I) $$) AS (x agtype);
+ x 
+---
+(0 rows)
+
+SELECT * FROM cypher('cypher_merge', $$ MERGE ()-[:B]->(x:C)-[:E]->(x:C)<-[f:F]-(y:I) RETURN x $$) AS (x agtype);
+                                x                                 
+------------------------------------------------------------------
+ {"id": 3096224743817220, "label": "C", "properties": {}}::vertex
+(1 row)
+
+SELECT * FROM cypher('cypher_merge', $$ MERGE p=()-[:B]->(x:C)-[:E]->(x:C)<-[f:F]-(y:I) $$) AS (p agtype);
+ p 
+---
+(0 rows)
+
+SELECT * FROM cypher('cypher_merge', $$ MERGE p=()-[:B]->(x:C)-[:E]->(x:C)<-[f:F]-(y:I) RETURN p $$) AS (p agtype);
+                                                                                                                                                                                                                                                                                                                             p                                                                                                                                                                                                                                                                                                                             
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ [{"id": 281474976710708, "label": "", "properties": {}}::vertex, {"id": 2814749767106564, "label": "B", "end_id": 3096224743817223, "start_id": 281474976710708, "properties": {}}::edge, {"id": 3096224743817223, "label": "C", "properties": {}}::vertex, {"id": 3377699720527876, "label": "E", "end_id": 3096224743817224, "start_id": 3096224743817223, "properties": {}}::edge, {"id": 3096224743817224, "label": "C", "properties": {}}::vertex, {"id": 3659174697238532, "label": "F", "end_id": 3096224743817224, "start_id": 3940649673949188, "properties": {}}::edge, {"id": 3940649673949188, "label": "I", "properties": {}}::vertex]::path
+(1 row)
+
+SELECT * FROM cypher('cypher_merge', $$ MERGE p=()-[:B]->(x:C)-[:E]->(x:C)<-[f:F]-(y:I) RETURN p $$) AS (p agtype);
+                                                                                                                                                                                                                                                                                                                             p                                                                                                                                                                                                                                                                                                                             
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ [{"id": 281474976710709, "label": "", "properties": {}}::vertex, {"id": 2814749767106565, "label": "B", "end_id": 3096224743817225, "start_id": 281474976710709, "properties": {}}::edge, {"id": 3096224743817225, "label": "C", "properties": {}}::vertex, {"id": 3377699720527877, "label": "E", "end_id": 3096224743817226, "start_id": 3096224743817225, "properties": {}}::edge, {"id": 3096224743817226, "label": "C", "properties": {}}::vertex, {"id": 3659174697238533, "label": "F", "end_id": 3096224743817226, "start_id": 3940649673949189, "properties": {}}::edge, {"id": 3940649673949189, "label": "I", "properties": {}}::vertex]::path
+(1 row)
+
+-- TODO This should only return 1 row, as the path should already exist.
+--      However, we need to fix the variable reuse in MERGE. Until then,
+--      this will always return 5 rows due to 'x' above not being the same node.
+SELECT * FROM cypher('cypher_merge', $$ MATCH p=()-[:B]->(:C)-[:E]->(:C)<-[:F]-(:I) RETURN p $$) AS (p agtype);
+                                                                                                                                                                                                                                                                                                                             p                                                                                                                                                                                                                                                                                                                             
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ [{"id": 281474976710705, "label": "", "properties": {}}::vertex, {"id": 2814749767106561, "label": "B", "end_id": 3096224743817217, "start_id": 281474976710705, "properties": {}}::edge, {"id": 3096224743817217, "label": "C", "properties": {}}::vertex, {"id": 3377699720527873, "label": "E", "end_id": 3096224743817218, "start_id": 3096224743817217, "properties": {}}::edge, {"id": 3096224743817218, "label": "C", "properties": {}}::vertex, {"id": 3659174697238529, "label": "F", "end_id": 3096224743817218, "start_id": 3940649673949185, "properties": {}}::edge, {"id": 3940649673949185, "label": "I", "properties": {}}::vertex]::path
+ [{"id": 281474976710706, "label": "", "properties": {}}::vertex, {"id": 2814749767106562, "label": "B", "end_id": 3096224743817219, "start_id": 281474976710706, "properties": {}}::edge, {"id": 3096224743817219, "label": "C", "properties": {}}::vertex, {"id": 3377699720527874, "label": "E", "end_id": 3096224743817220, "start_id": 3096224743817219, "properties": {}}::edge, {"id": 3096224743817220, "label": "C", "properties": {}}::vertex, {"id": 3659174697238530, "label": "F", "end_id": 3096224743817220, "start_id": 3940649673949186, "properties": {}}::edge, {"id": 3940649673949186, "label": "I", "properties": {}}::vertex]::path
+ [{"id": 281474976710707, "label": "", "properties": {}}::vertex, {"id": 2814749767106563, "label": "B", "end_id": 3096224743817221, "start_id": 281474976710707, "properties": {}}::edge, {"id": 3096224743817221, "label": "C", "properties": {}}::vertex, {"id": 3377699720527875, "label": "E", "end_id": 3096224743817222, "start_id": 3096224743817221, "properties": {}}::edge, {"id": 3096224743817222, "label": "C", "properties": {}}::vertex, {"id": 3659174697238531, "label": "F", "end_id": 3096224743817222, "start_id": 3940649673949187, "properties": {}}::edge, {"id": 3940649673949187, "label": "I", "properties": {}}::vertex]::path
+ [{"id": 281474976710708, "label": "", "properties": {}}::vertex, {"id": 2814749767106564, "label": "B", "end_id": 3096224743817223, "start_id": 281474976710708, "properties": {}}::edge, {"id": 3096224743817223, "label": "C", "properties": {}}::vertex, {"id": 3377699720527876, "label": "E", "end_id": 3096224743817224, "start_id": 3096224743817223, "properties": {}}::edge, {"id": 3096224743817224, "label": "C", "properties": {}}::vertex, {"id": 3659174697238532, "label": "F", "end_id": 3096224743817224, "start_id": 3940649673949188, "properties": {}}::edge, {"id": 3940649673949188, "label": "I", "properties": {}}::vertex]::path
+ [{"id": 281474976710709, "label": "", "properties": {}}::vertex, {"id": 2814749767106565, "label": "B", "end_id": 3096224743817225, "start_id": 281474976710709, "properties": {}}::edge, {"id": 3096224743817225, "label": "C", "properties": {}}::vertex, {"id": 3377699720527877, "label": "E", "end_id": 3096224743817226, "start_id": 3096224743817225, "properties": {}}::edge, {"id": 3096224743817226, "label": "C", "properties": {}}::vertex, {"id": 3659174697238533, "label": "F", "end_id": 3096224743817226, "start_id": 3940649673949189, "properties": {}}::edge, {"id": 3940649673949189, "label": "I", "properties": {}}::vertex]::path
+(5 rows)
+
 --clean up
 SELECT * FROM cypher('cypher_merge', $$MATCH (n) DETACH DELETE n $$) AS (a agtype);
  a 
@@ -911,7 +958,7 @@
  * Clean up graph
  */
 SELECT drop_graph('cypher_merge', true);
-NOTICE:  drop cascades to 9 other objects
+NOTICE:  drop cascades to 14 other objects
 DETAIL:  drop cascades to table cypher_merge._ag_label_vertex
 drop cascades to table cypher_merge._ag_label_edge
 drop cascades to table cypher_merge.e
@@ -921,6 +968,11 @@
 drop cascades to table cypher_merge."City"
 drop cascades to table cypher_merge."BORN_IN"
 drop cascades to table cypher_merge.node
+drop cascades to table cypher_merge."B"
+drop cascades to table cypher_merge."C"
+drop cascades to table cypher_merge."E"
+drop cascades to table cypher_merge."F"
+drop cascades to table cypher_merge."I"
 NOTICE:  graph "cypher_merge" has been dropped
  drop_graph 
 ------------
diff --git a/regress/sql/cypher_merge.sql b/regress/sql/cypher_merge.sql
index 8c127f6..1fe3ed7 100644
--- a/regress/sql/cypher_merge.sql
+++ b/regress/sql/cypher_merge.sql
@@ -479,6 +479,22 @@
 SELECT * FROM cypher('cypher_merge', $$ MERGE (n:node {name: 'Jason'}) SET n.name = 'Lisa', n.age = 23, n.gender = 'Female' RETURN n $$) AS (n agtype);
 SELECT * FROM cypher('cypher_merge', $$ MATCH (n:node) RETURN n $$) AS (n agtype);
 
+--
+-- Complex MERGE w/wo RETURN values
+--
+-- These should each create a path, if it doesn't already exist.
+-- TODO Until the issue with variable reuse of 'x' in MERGE is corrected,
+--      these commands will each create a new path.
+SELECT * FROM cypher('cypher_merge', $$ MERGE ()-[:B]->(x:C)-[:E]->(x:C)<-[f:F]-(y:I) $$) AS (x agtype);
+SELECT * FROM cypher('cypher_merge', $$ MERGE ()-[:B]->(x:C)-[:E]->(x:C)<-[f:F]-(y:I) RETURN x $$) AS (x agtype);
+SELECT * FROM cypher('cypher_merge', $$ MERGE p=()-[:B]->(x:C)-[:E]->(x:C)<-[f:F]-(y:I) $$) AS (p agtype);
+SELECT * FROM cypher('cypher_merge', $$ MERGE p=()-[:B]->(x:C)-[:E]->(x:C)<-[f:F]-(y:I) RETURN p $$) AS (p agtype);
+SELECT * FROM cypher('cypher_merge', $$ MERGE p=()-[:B]->(x:C)-[:E]->(x:C)<-[f:F]-(y:I) RETURN p $$) AS (p agtype);
+-- TODO This should only return 1 row, as the path should already exist.
+--      However, we need to fix the variable reuse in MERGE. Until then,
+--      this will always return 5 rows due to 'x' above not being the same node.
+SELECT * FROM cypher('cypher_merge', $$ MATCH p=()-[:B]->(:C)-[:E]->(:C)<-[:F]-(:I) RETURN p $$) AS (p agtype);
+
 --clean up
 SELECT * FROM cypher('cypher_merge', $$MATCH (n) DETACH DELETE n $$) AS (a agtype);
 
diff --git a/src/backend/executor/cypher_merge.c b/src/backend/executor/cypher_merge.c
index b0247d0..b42408b 100644
--- a/src/backend/executor/cypher_merge.c
+++ b/src/backend/executor/cypher_merge.c
@@ -230,11 +230,27 @@
         ExprContext *econtext = css->css.ss.ps.ps_ExprContext;
         TupleTableSlot *scantuple = econtext->ecxt_scantuple;
         Datum result;
+        int tuple_position = path->path_attr_num - 1;
+        bool debug_flag = false;
 
-        result = make_path(css->path_values);
+        /*
+         * We need to make sure that the tuple_position is within the
+         * boundaries of the tuple's number of attributes. Otherwise, it
+         * will corrupt memory. The cases where it doesn't fit within are
+         * usually due to a variable that is specified but there isn't a RETURN
+         * clause. In these cases we just don't bother to store the
+         * value.
+         */
+         if (!debug_flag &&
+             (tuple_position < scantuple->tts_tupleDescriptor->natts ||
+              scantuple->tts_tupleDescriptor->natts != 1))
+        {
+            result = make_path(css->path_values);
 
-        scantuple->tts_values[path->path_attr_num - 1] = result;
-        scantuple->tts_isnull[path->path_attr_num - 1] = false;
+            /* store the result */
+            scantuple->tts_values[tuple_position] = result;
+            scantuple->tts_isnull[tuple_position] = false;
+        }
     }
 }
 
@@ -739,25 +755,25 @@
              */
             if (CYPHER_TARGET_NODE_IS_VARIABLE(node->flags))
             {
-                bool debug = false;
+                bool debug_flag = false;
                 int tuple_position = node->tuple_position - 1;
 
                 /*
-                 * Generate an error message if the tuple position is
-                 * out-of-bounds and allow for debugging.
+                 * We need to make sure that the tuple_position is within the
+                 * boundaries of the tuple's number of attributes. Otherwise, it
+                 * will corrupt memory. The cases where it doesn't fall within
+                 * are usually due to a variable that is specified but there
+                 * isn't a RETURN clause. In these cases we just don't bother to
+                 * store the value.
                  */
-                if (!debug && (tuple_position >=
-                               scanTupleSlot->tts_tupleDescriptor->natts))
+                if (!debug_flag &&
+                    (tuple_position < scanTupleSlot->tts_tupleDescriptor->natts ||
+                     scanTupleSlot->tts_tupleDescriptor->natts != 1))
                 {
-                    ereport(ERROR,
-                            (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-                             errmsg("merge_vertex: invalid tuple position")));
-
+                    /* store the result */
+                    scanTupleSlot->tts_values[tuple_position] = result;
+                    scanTupleSlot->tts_isnull[tuple_position] = false;
                 }
-
-                /* store the result */
-                scanTupleSlot->tts_values[tuple_position] = result;
-                scanTupleSlot->tts_isnull[tuple_position] = false;
             }
         }
     }
@@ -955,25 +971,25 @@
         if (CYPHER_TARGET_NODE_IS_VARIABLE(node->flags))
         {
             TupleTableSlot *scantuple = econtext->ecxt_scantuple;
-            bool debug = false;
+            bool debug_flag = false;
             int tuple_position = node->tuple_position - 1;
 
             /*
-             * Generate an error message if the tuple position is
-             * out-of-bounds and allow for debugging.
+             * We need to make sure that the tuple_position is within the
+             * boundaries of the tuple's number of attributes. Otherwise, it
+             * will corrupt memory. The cases where it doesn't fall within are
+             * usually due to a variable that is specified but there isn't a
+             * RETURN clause. In these cases we just don't bother to store the
+             * value.
              */
-            if (!debug && (tuple_position >=
-                           scantuple->tts_tupleDescriptor->natts))
+             if (!debug_flag &&
+                 (tuple_position < scantuple->tts_tupleDescriptor->natts ||
+                  scantuple->tts_tupleDescriptor->natts != 1))
             {
-                ereport(ERROR,
-                        (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-                         errmsg("merge_edge: invalid tuple position")));
-
+                /* store the result */
+                scantuple->tts_values[tuple_position] = result;
+                scantuple->tts_isnull[tuple_position] = false;
             }
-
-            /* store the result */
-            scantuple->tts_values[tuple_position] = result;
-            scantuple->tts_isnull[tuple_position] = false;
         }
     }
 }