Graph: Fix handling of bigint columns JIRA: MADLIB-1444 Graph modules cast the vertex ids to integers in a few spots. This commit fixes the issue by using bigint type.

commit: 4daecd6c4b1aa7bf70844d99e8302587111cf2d0 [log] [tgz]
author: Orhan Kislal <okislal@apache.org> Tue Jul 21 18:04:35 2020 -0400
committer: Orhan Kislal <okislal@pivotal.io> Wed Aug 05 14:31:01 2020 -0400
tree: 1febcd79249f4926cf0fa06bcd5de5d78ace8dac
parent: 8adcf41993a67de0ee3048716ec8a0c4d3132417 [diff]
diff --git a/src/ports/postgres/modules/graph/apsp.sql_in b/src/ports/postgres/modules/graph/apsp.sql_in
index 66d1f74..893cd79 100644
--- a/src/ports/postgres/modules/graph/apsp.sql_in
+++ b/src/ports/postgres/modules/graph/apsp.sql_in

@@ -78,7 +78,7 @@
 
 <dt>vertex_id</dt>
 <dd>TEXT, default = 'id'. Name of the column in 'vertex_table' containing
-vertex ids.  The vertex ids are of type BIGINT with no duplicates.
+vertex ids.  The vertex ids can be of type INTEGER or BIGINT with no duplicates.
 They do not need to be contiguous.</dd>
 
 <dt>edge_table</dt>
@@ -90,9 +90,9 @@
 <dd>TEXT. A comma-delimited string containing multiple named arguments of
 the form "name=value". The following parameters are supported for
 this string argument:
-  - src (BIGINT): Name of the column containing the source vertex ids in the
+  - src (INTEGER or BIGINT): Name of the column containing the source vertex ids in the
   edge table. Default column name is 'src'.
-  - dest (BIGINT): Name of the column containing the destination vertex ids
+  - dest (INTEGER or BIGINT): Name of the column containing the destination vertex ids
   in the edge table. Default column name is 'dest'.
   - weight (FLOAT8): Name of the column containing the edge weights in the
   edge table. Default column name is 'weight'.</dd>
@@ -142,10 +142,10 @@
 <dd>TEXT. Name of the table that contains the APSP output.</dd>
 
 <dt>source_vertex</dt>
-<dd>BIGINT. The vertex that will be the source of the desired path.</dd>
+<dd>INTEGER or BIGINT. The vertex that will be the source of the desired path.</dd>
 
 <dt>dest_vertex</dt>
-<dd>BIGINT. The vertex that will be the destination of the desired path.</dd>
+<dd>INTEGER or BIGINT. The vertex that will be the destination of the desired path.</dd>
 
 <dt>path_table</dt>
 <dd>TEXT. Name of the output table that contains the path.

diff --git a/src/ports/postgres/modules/graph/bfs.py_in b/src/ports/postgres/modules/graph/bfs.py_in
index e802aac..70bff3f 100644
--- a/src/ports/postgres/modules/graph/bfs.py_in
+++ b/src/ports/postgres/modules/graph/bfs.py_in

@@ -59,8 +59,8 @@
         """Graph BFS: Invalid value for directed ({0}), must be boolean.""".
         format(directed))
 
-    _assert(isinstance(source_vertex,int),
-        """Graph BFS: Source vertex {source_vertex} has to be an integer.""".
+    _assert(isinstance(source_vertex,int) or isinstance(source_vertex,long),
+        """Graph BFS: Source vertex {source_vertex} has to be an integer or bigint.""".
         format(**locals()))
     src_exists = plpy.execute("""
         SELECT * FROM {vertex_table} WHERE {vertex_id}={source_vertex}
@@ -217,9 +217,9 @@
                 vertex_id               TEXT,
                 edge_table              TEXT,
                 edge_args               TEXT,
-                source_vertex           INTEGER,
+                source_vertex           BIGINT,
                 out_table               TEXT,
-                max_distance            INTEGER,
+                max_distance            BIGINT,
                 directed                BOOLEAN,
                 grouping_cols           TEXT
             )

diff --git a/src/ports/postgres/modules/graph/bfs.sql_in b/src/ports/postgres/modules/graph/bfs.sql_in
index ac945a1..f9507d9 100644
--- a/src/ports/postgres/modules/graph/bfs.sql_in
+++ b/src/ports/postgres/modules/graph/bfs.sql_in

@@ -68,7 +68,7 @@
 
 <dt>vertex_id</dt>
 <dd>TEXT, default = 'id'. Name of the column in 'vertex_table' containing
-vertex ids.  The vertex ids are of type INTEGER with no duplicates.
+vertex ids.  The vertex ids can be of type INTEGER or BIGINT with no duplicates.
 They do not need to be contiguous.</dd>
 
 <dt>edge_table</dt>
@@ -82,14 +82,14 @@
 <dd>TEXT. A comma-delimited string containing multiple named arguments of
 the form "name=value". The following parameters are supported for
 this string argument:
-  - src (INTEGER): Name of the column containing the source vertex ids in the edge table.
+  - src (INTEGER or BIGINT): Name of the column containing the source vertex ids in the edge table.
   Default column name is 'src'.
   (This is not to be confused with the 'source_vertex' argument passed to the BFS function.)
-  - dest (INTEGER): Name of the column containing the destination vertex ids in
+  - dest (INTEGER or BIGINT): Name of the column containing the destination vertex ids in
   the edge table. Default column name is 'dest'.
 
 <dt>source_vertex</dt>
-<dd>INTEGER. The source vertex id for the algorithm to start. This vertex id must
+<dd>INTEGER or BIGINT. The source vertex id for the algorithm to start. This vertex id must
 exist in the 'vertex_id' column of 'vertex_table'.</dd>
 
 <dt>out_table</dt>
@@ -401,7 +401,7 @@
     vertex_id               TEXT,
     edge_table              TEXT,
     edge_args               TEXT,
-    source_vertex           INT,
+    source_vertex           BIGINT,
     out_table               TEXT,
     max_distance            INT,
     directed                BOOLEAN,
@@ -418,7 +418,7 @@
     vertex_id               TEXT,
     edge_table              TEXT,
     edge_args               TEXT,
-    source_vertex           INT,
+    source_vertex           BIGINT,
     out_table               TEXT,
     max_distance            INT,
     directed                BOOLEAN
@@ -434,7 +434,7 @@
     vertex_id               TEXT,
     edge_table              TEXT,
     edge_args               TEXT,
-    source_vertex           INT,
+    source_vertex           BIGINT,
     out_table               TEXT,
     max_distance            INT
 ) RETURNS VOID AS $$
@@ -449,7 +449,7 @@
     vertex_id               TEXT,
     edge_table              TEXT,
     edge_args               TEXT,
-    source_vertex           INT,
+    source_vertex           BIGINT,
     out_table               TEXT
 ) RETURNS VOID AS $$
      SELECT MADLIB_SCHEMA.graph_bfs($1, $2, $3, $4, $5, $6, NULL, NULL, NULL);

diff --git a/src/ports/postgres/modules/graph/hits.py_in b/src/ports/postgres/modules/graph/hits.py_in
index 1283070..ad8e748 100644
--- a/src/ports/postgres/modules/graph/hits.py_in
+++ b/src/ports/postgres/modules/graph/hits.py_in

@@ -173,7 +173,7 @@
             plpy.execute("""
                     CREATE TABLE {out_table} (
                         {grouping_cols_for_create_table_comma}
-                        {vertex_id} INTEGER,
+                        {vertex_id} BIGINT,
                         authority DOUBLE PRECISION,
                         hub DOUBLE PRECISION
                     )

diff --git a/src/ports/postgres/modules/graph/hits.sql_in b/src/ports/postgres/modules/graph/hits.sql_in
index 83f838d..d2d6cfc 100644
--- a/src/ports/postgres/modules/graph/hits.sql_in
+++ b/src/ports/postgres/modules/graph/hits.sql_in

@@ -70,7 +70,7 @@
 
 <dt>vertex_id</dt>
 <dd>TEXT, default = 'id'. Name of the column in 'vertex_table' containing
-    vertex ids. The vertex ids are of type INTEGER with no duplicates. They
+    vertex ids. The vertex ids can be of type INTEGER or BIGINT with no duplicates. They
     do not need to be contiguous.</dd>
 
 <dt>edge_table</dt>
@@ -81,9 +81,9 @@
 <dd>TEXT. A comma-delimited string containing multiple named arguments of
 the form "name=value". The following parameters are supported for
 this string argument:
-  - src (INTEGER): Name of the column containing the source vertex ids in
+  - src (INTEGER or BIGINT): Name of the column containing the source vertex ids in
                    the edge table. Default column name is 'src'.
-  - dest (INTEGER): Name of the column containing the destination vertex
+  - dest (INTEGER or BIGINT): Name of the column containing the destination vertex
                     ids in the edge table. Default column name is 'dest'.</dd>
 
 <dt>out_table</dt>

diff --git a/src/ports/postgres/modules/graph/measures.py_in b/src/ports/postgres/modules/graph/measures.py_in
index 3ec07e9..902c0d2 100644
--- a/src/ports/postgres/modules/graph/measures.py_in
+++ b/src/ports/postgres/modules/graph/measures.py_in

@@ -264,7 +264,7 @@
                 {grouping_cols_comma}
                 {e.weight} AS diameter,
                 {self._madlib}.matrix_agg(
-                        ARRAY[{e.src}, {e.dest}]::double precision[])::integer[]
+                        ARRAY[{e.src}, {e.dest}]::double precision[])::BIGINT[]
                     AS diameter_end_vertices
             FROM
                 {apsp_table} JOIN

diff --git a/src/ports/postgres/modules/graph/measures.sql_in b/src/ports/postgres/modules/graph/measures.sql_in
index 0879832..50afaf3 100644
--- a/src/ports/postgres/modules/graph/measures.sql_in
+++ b/src/ports/postgres/modules/graph/measures.sql_in

@@ -643,7 +643,7 @@
 
 <dt>vertex_id</dt>
 <dd>TEXT, default = 'id'. Name of the column in 'vertex_table' containing
-vertex ids.  The vertex ids are of type INTEGER with no duplicates.
+vertex ids.  The vertex ids can be of type INTEGER or BIGINT with no duplicates.
 They do not need to be contiguous.</dd>
 
 <dt>edge_table</dt>
@@ -655,9 +655,9 @@
 <dd>TEXT. A comma-delimited string containing multiple named arguments of
 the form "name=value". The following parameters are supported for
 this string argument:
-  - src (INTEGER): Name of the column containing the source vertex ids in the
+  - src (INTEGER or BIGINT): Name of the column containing the source vertex ids in the
   edge table. Default column name is 'src'.
-  - dest (INTEGER): Name of the column containing the destination vertex ids
+  - dest (INTEGER or BIGINT): Name of the column containing the destination vertex ids
   in the edge table. Default column name is 'dest'.
   - weight (FLOAT8): Name of the column containing the edge weights in the
   edge table. Default column name is 'weight'.</dd>

diff --git a/src/ports/postgres/modules/graph/pagerank.py_in b/src/ports/postgres/modules/graph/pagerank.py_in
index e732675..830432b 100644
--- a/src/ports/postgres/modules/graph/pagerank.py_in
+++ b/src/ports/postgres/modules/graph/pagerank.py_in

@@ -68,6 +68,7 @@
             "PageRank: Invalid damping factor value ({0}), must be between 0 and 1.".
             format(damping_factor))
 
+
     # Validate against the given set of nodes for Personalized Page Rank
     if personalization_vertices:
         grouping_cols = get_table_qualified_col_str(
@@ -76,6 +77,9 @@
             if grouping_cols_list else ''
         src = edge_params["src"]
         dest = edge_params["dest"]
+        input_personalization_vertices_length = len(personalization_vertices)
+
+        personalization_vertices_str = ','.join([str(i) for i in personalization_vertices])
 
         # Get a list which has the number of personalization nodes of each group
         vertices_count_list_by_group = plpy.execute("""
@@ -84,11 +88,10 @@
                 RIGHT JOIN {edge_table}
                 ON ({vertex_table}.{vertex_id} = {edge_table}.{src}
                 OR {vertex_table}.{vertex_id} = {edge_table}.{dest})
-                AND {vertex_table}.{vertex_id} = ANY(ARRAY{personalization_vertices})
+                AND {vertex_table}.{vertex_id} = ANY(ARRAY[{personalization_vertices_str}])
                 {group_by_clause}
             """.format(**locals()))
 
-        input_personalization_vertices_length = len(personalization_vertices)
 
         # The number of personalization nodes for every group should be equal to
         # the number given by input personalization_vertices list. Otherwise,
@@ -277,12 +280,15 @@
                 # factor for computing the random_prob
                 where_clause_ppr = ''
                 if personalization_vertices:
+                    personalization_vertices_str = ','.join(
+                        [str(i) for i in personalization_vertices])
                     where_clause_ppr = """
-                        where __vertices__ = ANY(ARRAY{personalization_vertices})
+                        where __vertices__ = ANY(ARRAY[{personalization_vertices_str}])
                     """.format(**locals())
                     random_prob_grp = 1.0 - damping_factor
                     init_prob_grp = 1.0 / total_ppr_nodes
                 else:
+                    personalization_vertices_str = ''
                     random_prob_grp = """
                             {rand_damp}/COUNT(__vertices__)::DOUBLE PRECISION
                         """.format(**locals())
@@ -315,7 +321,7 @@
                 if personalization_vertices:
                     init_prob_grp_ppr = 1.0 / total_ppr_nodes
                     init_pr = """
-                            CASE when __vertices__ = ANY(ARRAY{personalization_vertices})
+                            CASE when __vertices__ = ANY(ARRAY[{personalization_vertices_str}])
                             THEN {init_prob_grp_ppr} ELSE 0 END
                         """.format(**locals())
 
@@ -387,7 +393,7 @@
                 plpy.execute("""
                         CREATE TABLE {out_table} (
                             {grouping_cols_clause},
-                            {vertex_id} INTEGER,
+                            {vertex_id} BIGINT,
                             pagerank DOUBLE PRECISION
                         )
                     """.format(**locals()))
@@ -716,15 +722,18 @@
         ppr_init_value = 1.0 / total_ppr_nodes
         prob_value = 1.0 - damping_factor
         dest = edge_params["dest"]
+
+        personalization_vertices_str = ','.join([str(i) for i in personalization_vertices])
+
         # In case of PPR, Assign the Random jump probability to the personalization_vertices only.
         # For rest of the nodes, Random jump probability  will be zero.
         ppr_random_prob_clause = """
-                CASE WHEN {edge_temp_table}.{dest} = ANY(ARRAY{personalization_vertices})
+                CASE WHEN {edge_temp_table}.{dest} = ANY(ARRAY[{personalization_vertices_str}])
                 THEN {prob_value} ELSE 0 END
             """.format(**locals())
 
         ppr_init_prob_clause = """
-                CASE WHEN {vertex_id} = ANY(ARRAY{personalization_vertices})
+                CASE WHEN {vertex_id} = ANY(ARRAY[{personalization_vertices_str}])
                 THEN {ppr_init_value} ELSE 0 END
             """.format(**locals())
     return(total_ppr_nodes, ppr_random_prob_clause, ppr_init_prob_clause)
@@ -754,7 +763,7 @@
                                          -- N is number of vertices in the graph)
         grouping_col  TEXT,              -- Comma separated column names to group on
                                          -- (DEFAULT = NULL, no grouping)
-        personalization_vertices ARRAY OF INTEGER, -- A comma seperated list of vertices
+        personalization_vertices ARRAY OF BIGINT, -- A comma seperated list of vertices
                                                       or nodes for personalized page rank.
 """) + """
 

diff --git a/src/ports/postgres/modules/graph/pagerank.sql_in b/src/ports/postgres/modules/graph/pagerank.sql_in
index cd239bd..8338fb6 100644
--- a/src/ports/postgres/modules/graph/pagerank.sql_in
+++ b/src/ports/postgres/modules/graph/pagerank.sql_in

@@ -77,7 +77,7 @@
 
 <dt>vertex_id</dt>
 <dd>TEXT, default = 'id'. Name of the column in 'vertex_table' containing
-vertex ids.  The vertex ids are of type INTEGER with no duplicates.
+vertex ids.  The vertex ids can be of type INTEGER or BIGINT with no duplicates.
 They do not need to be contiguous.</dd>
 
 <dt>edge_table</dt>
@@ -88,9 +88,9 @@
 <dd>TEXT. A comma-delimited string containing multiple named arguments of
 the form "name=value". The following parameters are supported for
 this string argument:
-  - src (INTEGER): Name of the column containing the source vertex ids in the edge table.
+  - src (INTEGER or BIGINT): Name of the column containing the source vertex ids in the edge table.
                    Default column name is 'src'.
-  - dest (INTEGER): Name of the column containing the destination vertex ids in the edge table.
+  - dest (INTEGER or BIGINT): Name of the column containing the destination vertex ids in the edge table.
                     Default column name is 'dest'.</dd>
 
 <dt>out_table</dt>
@@ -127,7 +127,7 @@
 @note Expressions are not currently supported for 'grouping_cols'.</dd>
 
 <dt> personalization_vertices (optional)</dt>
-<dd>INTEGER[], default: NULL. A comma separated list of vertices or nodes
+<dd>INTEGER[] or BIGINT[], default: NULL. A comma separated list of vertices or nodes
 for personalized PageRank. When this parameter is provided, personalized PageRank
 will run.  In the absence of this parameter, regular PageRank will run.
 </dl>
@@ -349,7 +349,7 @@
     max_iter        INTEGER,
     threshold       FLOAT8,
     grouping_cols   VARCHAR,
-    personalization_vertices INTEGER[]
+    personalization_vertices BIGINT[]
 ) RETURNS VOID AS $$
     PythonFunction(graph, pagerank, pagerank)
 $$ LANGUAGE plpythonu VOLATILE

diff --git a/src/ports/postgres/modules/graph/sssp.py_in b/src/ports/postgres/modules/graph/sssp.py_in
index 26ddf88..9e813b0 100644
--- a/src/ports/postgres/modules/graph/sssp.py_in
+++ b/src/ports/postgres/modules/graph/sssp.py_in

@@ -141,7 +141,7 @@
 
         w_type = get_expr_type(weight, edge_table).lower()
         init_w = INT_MAX
-        if w_type in ['real', 'double precision', 'float8']:
+        if w_type in ['real', 'double precision', 'float8', 'bigint']:
             init_w = INFINITY
 
         # We keep a table of every vertex, the minimum cost to that destination
@@ -162,7 +162,7 @@
             vertex_id               TEXT,
             edge_table              TEXT,
             edge_args               TEXT,
-            source_vertex           INTEGER,
+            source_vertex           BIGINT,
             out_table               TEXT,
             grouping_cols           TEXT)
             """.format(**locals()))

diff --git a/src/ports/postgres/modules/graph/sssp.sql_in b/src/ports/postgres/modules/graph/sssp.sql_in
index fd05fcf..195759d 100644
--- a/src/ports/postgres/modules/graph/sssp.sql_in
+++ b/src/ports/postgres/modules/graph/sssp.sql_in

@@ -68,7 +68,7 @@
 
 <dt>vertex_id</dt>
 <dd>TEXT, default = 'id'. Name of the column in 'vertex_table' containing
-vertex ids.  The vertex ids are of type BIGINT with no duplicates.
+vertex ids.  The vertex ids can be of type INTEGER or BIGINT with no duplicates.
 They do not need to be contiguous.</dd>
 
 <dt>edge_table</dt>
@@ -80,12 +80,12 @@
 <dd>TEXT. A comma-delimited string containing multiple named arguments of
 the form "name=value". The following parameters are supported for
 this string argument:
-  - src (BIGINT): Name of the column containing the source vertex ids in the edge table. Default column name is 'src'.
-  - dest (BIGINT): Name of the column containing the destination vertex ids in the edge table. Default column name is 'dest'.
+  - src (INTEGER or BIGINT): Name of the column containing the source vertex ids in the edge table. Default column name is 'src'.
+  - dest (INTEGER or BIGINT): Name of the column containing the destination vertex ids in the edge table. Default column name is 'dest'.
   - weight (FLOAT8): Name of the column containing the edge weights in the edge table. Default column name is 'weight'.</dd>
 
 <dt>source_vertex</dt>
-<dd>BIGINT. The source vertex id for the algorithm to start. This vertex id must
+<dd>INTEGER or BIGINT. The source vertex id for the algorithm to start. This vertex id must
 exist in the 'vertex_id' column of 'vertex_table'.</dd>
 
 <dt>out_table</dt>
@@ -125,7 +125,7 @@
 <dd>TEXT. Name of the table that contains the SSSP output.</dd>
 
 <dt>dest_vertex</dt>
-<dd>BIGINT. The vertex that will be the destination of the desired path.</dd>
+<dd>INTEGER or BIGINT. The vertex that will be the destination of the desired path.</dd>
 
 <dt>path_table</dt>
 <dd>TEXT. Name of the output table that contains the path.

diff --git a/src/ports/postgres/modules/graph/test/apsp.sql_in b/src/ports/postgres/modules/graph/test/apsp.sql_in
index 4f399cc..05cd00d 100644
--- a/src/ports/postgres/modules/graph/test/apsp.sql_in
+++ b/src/ports/postgres/modules/graph/test/apsp.sql_in

@@ -113,12 +113,12 @@
 ALTER TABLE vertex RENAME COLUMN "DEST" TO id;
 
 -- Test for bigint columns
-CREATE TABLE v2 AS SELECT id::bigint FROM vertex;
-CREATE TABLE e2 AS SELECT src::bigint, "DEST"::bigint, weight FROM "EDGE";
+CREATE TABLE v2 AS SELECT (id+992147483647)::bigint as id FROM vertex;
+CREATE TABLE e2 AS SELECT (src+992147483647)::bigint as src, ("DEST"+992147483647)::bigint as dest, weight FROM "EDGE";
 
 DROP TABLE IF EXISTS pg_temp.out2, pg_temp.out2_summary, pg_temp.out2_path;
-SELECT graph_apsp('v2',NULL,'e2','dest="DEST"','pg_temp.out2');
-SELECT graph_apsp_get_path('pg_temp.out2',0,7,'pg_temp.out2_path');
+SELECT graph_apsp('v2',NULL,'e2',NULL,'pg_temp.out2');
+SELECT graph_apsp_get_path('pg_temp.out2',992147483647,992147483652,'pg_temp.out2_path');
 
 -- Test for infinite paths
 DROP TABLE IF EXISTS out, out_summary, out_path;

diff --git a/src/ports/postgres/modules/graph/test/bfs.sql_in b/src/ports/postgres/modules/graph/test/bfs.sql_in
index 46f97fb..38f7aa1 100644
--- a/src/ports/postgres/modules/graph/test/bfs.sql_in
+++ b/src/ports/postgres/modules/graph/test/bfs.sql_in

@@ -291,11 +291,12 @@
 ALTER TABLE vertex RENAME COLUMN dest TO id;
 
 -- Test for bigint columns
-CREATE TABLE v2 AS SELECT id::bigint FROM vertex;
-CREATE TABLE e2 AS SELECT "SRC"::bigint, dest::bigint, weight FROM "EDGE";
+
+CREATE TABLE v2 AS SELECT (id+992147483647)::bigint as id FROM vertex;
+CREATE TABLE e2 AS SELECT ("SRC"+992147483647)::bigint as src, (dest+992147483647)::bigint as dest FROM "EDGE";
 
 DROP TABLE IF EXISTS pg_temp.out2, pg_temp.out2_summary;
-SELECT graph_bfs('v2',NULL,'e2','src="SRC"',3,'pg_temp.out2');
+SELECT graph_bfs('v2',NULL,'e2',NULL,992147483650,'pg_temp.out2');
 SELECT count(*) from pg_temp.out2;
 SELECT * from pg_temp.out2_summary;
 

diff --git a/src/ports/postgres/modules/graph/test/hits.sql_in b/src/ports/postgres/modules/graph/test/hits.sql_in
index 2ef4d63..b15778a 100644
--- a/src/ports/postgres/modules/graph/test/hits.sql_in
+++ b/src/ports/postgres/modules/graph/test/hits.sql_in

@@ -173,8 +173,9 @@
 ALTER TABLE vertex RENAME COLUMN dest TO id;
 
 -- Test for bigint columns
-CREATE TABLE v2 AS SELECT id::bigint FROM vertex;
-CREATE TABLE e2 AS SELECT src::bigint, dest::bigint FROM edge;
+
+CREATE TABLE v2 AS SELECT (id+992147483647)::bigint as id FROM vertex;
+CREATE TABLE e2 AS SELECT (src+992147483647)::bigint as src, (dest+992147483647)::bigint as dest FROM edge;
 
 DROP TABLE IF EXISTS pg_temp.out2, pg_temp.out2_summary;
 SELECT hits('v2',NULL,'e2',NULL,'pg_temp.out2');

diff --git a/src/ports/postgres/modules/graph/test/measures.sql_in b/src/ports/postgres/modules/graph/test/measures.sql_in
index d3d0081..14dbab4 100644
--- a/src/ports/postgres/modules/graph/test/measures.sql_in
+++ b/src/ports/postgres/modules/graph/test/measures.sql_in

@@ -198,8 +198,10 @@
 
 -- Test for bigint columns
 
-CREATE TABLE v2 AS SELECT id::bigint FROM vertex;
-CREATE TABLE e2 AS SELECT src_id::bigint, "DEST_ID"::bigint, edge_weight FROM "EDGE";
+CREATE TABLE v2 AS SELECT (id+992147483647)::bigint as id FROM vertex;
+CREATE TABLE e2 AS
+SELECT (src_id+992147483647)::bigint as src_id, ("DEST_ID"+992147483647)::bigint as "DEST_ID", edge_weight
+FROM "EDGE";
 
 DROP TABLE IF EXISTS out_apsp, out_apsp_summary;
 SELECT graph_apsp('v2',      -- Vertex table

diff --git a/src/ports/postgres/modules/graph/test/pagerank.sql_in b/src/ports/postgres/modules/graph/test/pagerank.sql_in
index 22794e9..870ed00 100644
--- a/src/ports/postgres/modules/graph/test/pagerank.sql_in
+++ b/src/ports/postgres/modules/graph/test/pagerank.sql_in

@@ -209,8 +209,9 @@
 ALTER TABLE vertex RENAME COLUMN dest TO id;
 
 -- Test for bigint columns
-CREATE TABLE v2 AS SELECT id::bigint FROM vertex;
-CREATE TABLE e2 AS SELECT src::bigint, dest::bigint FROM "EDGE";
+
+CREATE TABLE v2 AS SELECT (id+992147483647)::bigint as id FROM vertex;
+CREATE TABLE e2 AS SELECT (src+992147483647)::bigint as src, (dest+992147483647)::bigint as dest FROM "EDGE";
 
 DROP TABLE IF EXISTS pg_temp.out2, pg_temp.out2_summary;
 SELECT pagerank('v2',NULL,'e2',NULL,'pg_temp.out2');

diff --git a/src/ports/postgres/modules/graph/test/sssp.sql_in b/src/ports/postgres/modules/graph/test/sssp.sql_in
index 1a01c55..2bd359a 100644
--- a/src/ports/postgres/modules/graph/test/sssp.sql_in
+++ b/src/ports/postgres/modules/graph/test/sssp.sql_in

@@ -158,13 +158,14 @@
 ALTER TABLE vertex RENAME COLUMN dest TO id;
 
 -- Test for bigint columns
-CREATE TABLE v2 AS SELECT id::bigint FROM vertex;
-CREATE TABLE e2 AS SELECT src::bigint, dest::bigint, weight FROM "EDGE";
+
+CREATE TABLE v2 AS SELECT (id+992147483647)::bigint as id FROM vertex;
+CREATE TABLE e2 AS SELECT (src+992147483647)::bigint as src, (dest+992147483647)::bigint as dest, weight FROM "EDGE";
 
 DROP TABLE IF EXISTS pg_temp.out2, pg_temp.out2_summary, pg_temp.out2_path;
-SELECT graph_sssp('v2',NULL,'e2',NULL,0,'pg_temp.out2');
+SELECT graph_sssp('v2',NULL,'e2',NULL,992147483647,'pg_temp.out2');
 SELECT count(*) from pg_temp.out2;
-SELECT graph_sssp_get_path('pg_temp.out2',5,'pg_temp.out2_path');
+SELECT graph_sssp_get_path('pg_temp.out2',992147483652,'pg_temp.out2_path');
 
 -- Test for infinite paths
 DROP TABLE IF EXISTS out, out_summary, out_path;

diff --git a/src/ports/postgres/modules/graph/test/wcc.sql_in b/src/ports/postgres/modules/graph/test/wcc.sql_in
index 6dc5e7f..0917ae2 100644
--- a/src/ports/postgres/modules/graph/test/wcc.sql_in
+++ b/src/ports/postgres/modules/graph/test/wcc.sql_in

@@ -176,10 +176,11 @@
 ALTER TABLE vertex RENAME COLUMN dest TO id;
 
 -- Test for bigint columns
-CREATE TABLE v2 AS SELECT id::bigint FROM vertex;
-CREATE TABLE e2 AS SELECT src_node::bigint, dest_node::bigint FROM "EDGE";
+
+CREATE TABLE v2 AS SELECT (id+992147483647)::bigint as id FROM vertex;
+CREATE TABLE e2 AS SELECT (src_node+992147483647)::bigint as src, (dest_node+992147483647)::bigint as dest FROM "EDGE";
 
 DROP TABLE IF EXISTS pg_temp.out2, pg_temp.out2_summary;
-SELECT weakly_connected_components('v2',NULL,'e2','src=src_node,dest=dest_node','pg_temp.out2');
+SELECT weakly_connected_components('v2',NULL,'e2',NULL,'pg_temp.out2');
 SELECT count(*) from pg_temp.out2;
 SELECT count(*) from pg_temp.out2_summary;

diff --git a/src/ports/postgres/modules/graph/wcc.py_in b/src/ports/postgres/modules/graph/wcc.py_in
index 28b06f5..c8b5ab2 100644
--- a/src/ports/postgres/modules/graph/wcc.py_in
+++ b/src/ports/postgres/modules/graph/wcc.py_in

@@ -121,7 +121,7 @@
     edge_to_update_where_condition = ''
     edge_inverse_to_update_where_condition = ''
 
-    INT_MAX = 2147483647
+    BIGINT_MAX = 9223372036854775807
     component_id = 'component_id'
     grouping_cols_comma = '' if not grouping_cols else grouping_cols + ','
     comma_grouping_cols = '' if not grouping_cols else ',' + grouping_cols
@@ -168,7 +168,7 @@
         plpy.execute("""
                 CREATE TABLE {newupdate} AS
                 SELECT {subq}.{vertex_id},
-                        CAST({INT_MAX} AS INT) AS {component_id}
+                        CAST({BIGINT_MAX} AS BIGINT) AS {component_id}
                         {select_grouping_cols}
                 FROM {distinct_grp_table} INNER JOIN (
                     SELECT {select_grouping_cols_clause} {src} AS {vertex_id}
@@ -188,7 +188,7 @@
         plpy.execute("""
                 CREATE TEMP TABLE {message} AS
                 SELECT {vertex_id},
-                        CAST({vertex_id} AS INT) AS {component_id}
+                        CAST({vertex_id} AS BIGINT) AS {component_id}
                         {select_grouping_cols_clause}
                 FROM {newupdate}
                 {distribution}
@@ -197,13 +197,13 @@
     else:
         plpy.execute("""
                 CREATE TABLE {newupdate} AS
-                SELECT {vertex_id}, CAST({INT_MAX} AS INT) AS {component_id}
+                SELECT {vertex_id}, CAST({BIGINT_MAX} AS BIGINT) AS {component_id}
                 FROM {vertex_table}
                 {distribution}
             """.format(**locals()))
         plpy.execute("""
                 CREATE TEMP TABLE {message} AS
-                SELECT {vertex_id}, CAST({vertex_id} AS INT) AS {component_id}
+                SELECT {vertex_id}, CAST({vertex_id} AS BIGINT) AS {component_id}
                 FROM {vertex_table}
                 {distribution}
             """.format(**locals()))
@@ -211,7 +211,7 @@
     while nodes_to_update > 0:
         # Look at all the neighbors of a node, and assign the smallest node id
         # among the neighbors as its component_id. The next table starts off
-        # with very high component_id (INT_MAX). The component_id of all nodes
+        # with very high component_id (BIGINT_MAX). The component_id of all nodes
         # which obtain a smaller component_id after looking at its neighbors are
         # updated in the next table. At every iteration update only those nodes
         # whose component_id in the previous iteration are greater than what was

diff --git a/src/ports/postgres/modules/graph/wcc.sql_in b/src/ports/postgres/modules/graph/wcc.sql_in
index bc6ce7a..ad70e3f 100644
--- a/src/ports/postgres/modules/graph/wcc.sql_in
+++ b/src/ports/postgres/modules/graph/wcc.sql_in

@@ -72,7 +72,7 @@
 
 <dt>vertex_id</dt>
 <dd>TEXT, default = 'id'. Name of the column in 'vertex_table' containing
-vertex ids.  The vertex ids are of type INTEGER with no duplicates.
+vertex ids.  The vertex ids can be of type INTEGER or BIGINT with no duplicates.
 They do not need to be contiguous.</dd>
 
 <dt>edge_table</dt>
@@ -83,10 +83,8 @@
 <dd>TEXT. A comma-delimited string containing multiple named arguments of
 the form "name=value". The following parameters are supported for
 this string argument:
-  - src (INTEGER): Name of the column containing the source vertex ids in the edge table.
-                   Default column name is 'src'.
-  - dest (INTEGER): Name of the column containing the destination vertex ids in the edge table.
-                    Default column name is 'dest'.</dd>
+  - src (INTEGER or BIGINT): Name of the column containing the source vertex ids in the edge table. Default column name is 'src'.
+  - dest (INTEGER or BIGINT): Name of the column containing the destination vertex ids in the edge table. Default column name is 'dest'.</dd>
 
 <dt>out_table</dt>
 <dd>TEXT. Name of the table to store the component ID associated with each vertex.
commit	4daecd6c4b1aa7bf70844d99e8302587111cf2d0	[log] [tgz]
author	Orhan Kislal <okislal@apache.org>	Tue Jul 21 18:04:35 2020 -0400
committer	Orhan Kislal <okislal@pivotal.io>	Wed Aug 05 14:31:01 2020 -0400
tree	1febcd79249f4926cf0fa06bcd5de5d78ace8dac
parent	8adcf41993a67de0ee3048716ec8a0c4d3132417 [diff]