Graph: Fix handling of bigint columns
JIRA: MADLIB-1444
Graph modules cast the vertex ids to integers in a few spots.
This commit fixes the issue by using bigint type.
diff --git a/src/ports/postgres/modules/graph/apsp.sql_in b/src/ports/postgres/modules/graph/apsp.sql_in
index 66d1f74..893cd79 100644
--- a/src/ports/postgres/modules/graph/apsp.sql_in
+++ b/src/ports/postgres/modules/graph/apsp.sql_in
@@ -78,7 +78,7 @@
<dt>vertex_id</dt>
<dd>TEXT, default = 'id'. Name of the column in 'vertex_table' containing
-vertex ids. The vertex ids are of type BIGINT with no duplicates.
+vertex ids. The vertex ids can be of type INTEGER or BIGINT with no duplicates.
They do not need to be contiguous.</dd>
<dt>edge_table</dt>
@@ -90,9 +90,9 @@
<dd>TEXT. A comma-delimited string containing multiple named arguments of
the form "name=value". The following parameters are supported for
this string argument:
- - src (BIGINT): Name of the column containing the source vertex ids in the
+ - src (INTEGER or BIGINT): Name of the column containing the source vertex ids in the
edge table. Default column name is 'src'.
- - dest (BIGINT): Name of the column containing the destination vertex ids
+ - dest (INTEGER or BIGINT): Name of the column containing the destination vertex ids
in the edge table. Default column name is 'dest'.
- weight (FLOAT8): Name of the column containing the edge weights in the
edge table. Default column name is 'weight'.</dd>
@@ -142,10 +142,10 @@
<dd>TEXT. Name of the table that contains the APSP output.</dd>
<dt>source_vertex</dt>
-<dd>BIGINT. The vertex that will be the source of the desired path.</dd>
+<dd>INTEGER or BIGINT. The vertex that will be the source of the desired path.</dd>
<dt>dest_vertex</dt>
-<dd>BIGINT. The vertex that will be the destination of the desired path.</dd>
+<dd>INTEGER or BIGINT. The vertex that will be the destination of the desired path.</dd>
<dt>path_table</dt>
<dd>TEXT. Name of the output table that contains the path.
diff --git a/src/ports/postgres/modules/graph/bfs.py_in b/src/ports/postgres/modules/graph/bfs.py_in
index e802aac..70bff3f 100644
--- a/src/ports/postgres/modules/graph/bfs.py_in
+++ b/src/ports/postgres/modules/graph/bfs.py_in
@@ -59,8 +59,8 @@
"""Graph BFS: Invalid value for directed ({0}), must be boolean.""".
format(directed))
- _assert(isinstance(source_vertex,int),
- """Graph BFS: Source vertex {source_vertex} has to be an integer.""".
+ _assert(isinstance(source_vertex,int) or isinstance(source_vertex,long),
+ """Graph BFS: Source vertex {source_vertex} has to be an integer or bigint.""".
format(**locals()))
src_exists = plpy.execute("""
SELECT * FROM {vertex_table} WHERE {vertex_id}={source_vertex}
@@ -217,9 +217,9 @@
vertex_id TEXT,
edge_table TEXT,
edge_args TEXT,
- source_vertex INTEGER,
+ source_vertex BIGINT,
out_table TEXT,
- max_distance INTEGER,
+ max_distance BIGINT,
directed BOOLEAN,
grouping_cols TEXT
)
diff --git a/src/ports/postgres/modules/graph/bfs.sql_in b/src/ports/postgres/modules/graph/bfs.sql_in
index ac945a1..f9507d9 100644
--- a/src/ports/postgres/modules/graph/bfs.sql_in
+++ b/src/ports/postgres/modules/graph/bfs.sql_in
@@ -68,7 +68,7 @@
<dt>vertex_id</dt>
<dd>TEXT, default = 'id'. Name of the column in 'vertex_table' containing
-vertex ids. The vertex ids are of type INTEGER with no duplicates.
+vertex ids. The vertex ids can be of type INTEGER or BIGINT with no duplicates.
They do not need to be contiguous.</dd>
<dt>edge_table</dt>
@@ -82,14 +82,14 @@
<dd>TEXT. A comma-delimited string containing multiple named arguments of
the form "name=value". The following parameters are supported for
this string argument:
- - src (INTEGER): Name of the column containing the source vertex ids in the edge table.
+ - src (INTEGER or BIGINT): Name of the column containing the source vertex ids in the edge table.
Default column name is 'src'.
(This is not to be confused with the 'source_vertex' argument passed to the BFS function.)
- - dest (INTEGER): Name of the column containing the destination vertex ids in
+ - dest (INTEGER or BIGINT): Name of the column containing the destination vertex ids in
the edge table. Default column name is 'dest'.
<dt>source_vertex</dt>
-<dd>INTEGER. The source vertex id for the algorithm to start. This vertex id must
+<dd>INTEGER or BIGINT. The source vertex id for the algorithm to start. This vertex id must
exist in the 'vertex_id' column of 'vertex_table'.</dd>
<dt>out_table</dt>
@@ -401,7 +401,7 @@
vertex_id TEXT,
edge_table TEXT,
edge_args TEXT,
- source_vertex INT,
+ source_vertex BIGINT,
out_table TEXT,
max_distance INT,
directed BOOLEAN,
@@ -418,7 +418,7 @@
vertex_id TEXT,
edge_table TEXT,
edge_args TEXT,
- source_vertex INT,
+ source_vertex BIGINT,
out_table TEXT,
max_distance INT,
directed BOOLEAN
@@ -434,7 +434,7 @@
vertex_id TEXT,
edge_table TEXT,
edge_args TEXT,
- source_vertex INT,
+ source_vertex BIGINT,
out_table TEXT,
max_distance INT
) RETURNS VOID AS $$
@@ -449,7 +449,7 @@
vertex_id TEXT,
edge_table TEXT,
edge_args TEXT,
- source_vertex INT,
+ source_vertex BIGINT,
out_table TEXT
) RETURNS VOID AS $$
SELECT MADLIB_SCHEMA.graph_bfs($1, $2, $3, $4, $5, $6, NULL, NULL, NULL);
diff --git a/src/ports/postgres/modules/graph/hits.py_in b/src/ports/postgres/modules/graph/hits.py_in
index 1283070..ad8e748 100644
--- a/src/ports/postgres/modules/graph/hits.py_in
+++ b/src/ports/postgres/modules/graph/hits.py_in
@@ -173,7 +173,7 @@
plpy.execute("""
CREATE TABLE {out_table} (
{grouping_cols_for_create_table_comma}
- {vertex_id} INTEGER,
+ {vertex_id} BIGINT,
authority DOUBLE PRECISION,
hub DOUBLE PRECISION
)
diff --git a/src/ports/postgres/modules/graph/hits.sql_in b/src/ports/postgres/modules/graph/hits.sql_in
index 83f838d..d2d6cfc 100644
--- a/src/ports/postgres/modules/graph/hits.sql_in
+++ b/src/ports/postgres/modules/graph/hits.sql_in
@@ -70,7 +70,7 @@
<dt>vertex_id</dt>
<dd>TEXT, default = 'id'. Name of the column in 'vertex_table' containing
- vertex ids. The vertex ids are of type INTEGER with no duplicates. They
+ vertex ids. The vertex ids can be of type INTEGER or BIGINT with no duplicates. They
do not need to be contiguous.</dd>
<dt>edge_table</dt>
@@ -81,9 +81,9 @@
<dd>TEXT. A comma-delimited string containing multiple named arguments of
the form "name=value". The following parameters are supported for
this string argument:
- - src (INTEGER): Name of the column containing the source vertex ids in
+ - src (INTEGER or BIGINT): Name of the column containing the source vertex ids in
the edge table. Default column name is 'src'.
- - dest (INTEGER): Name of the column containing the destination vertex
+ - dest (INTEGER or BIGINT): Name of the column containing the destination vertex
ids in the edge table. Default column name is 'dest'.</dd>
<dt>out_table</dt>
diff --git a/src/ports/postgres/modules/graph/measures.py_in b/src/ports/postgres/modules/graph/measures.py_in
index 3ec07e9..902c0d2 100644
--- a/src/ports/postgres/modules/graph/measures.py_in
+++ b/src/ports/postgres/modules/graph/measures.py_in
@@ -264,7 +264,7 @@
{grouping_cols_comma}
{e.weight} AS diameter,
{self._madlib}.matrix_agg(
- ARRAY[{e.src}, {e.dest}]::double precision[])::integer[]
+ ARRAY[{e.src}, {e.dest}]::double precision[])::BIGINT[]
AS diameter_end_vertices
FROM
{apsp_table} JOIN
diff --git a/src/ports/postgres/modules/graph/measures.sql_in b/src/ports/postgres/modules/graph/measures.sql_in
index 0879832..50afaf3 100644
--- a/src/ports/postgres/modules/graph/measures.sql_in
+++ b/src/ports/postgres/modules/graph/measures.sql_in
@@ -643,7 +643,7 @@
<dt>vertex_id</dt>
<dd>TEXT, default = 'id'. Name of the column in 'vertex_table' containing
-vertex ids. The vertex ids are of type INTEGER with no duplicates.
+vertex ids. The vertex ids can be of type INTEGER or BIGINT with no duplicates.
They do not need to be contiguous.</dd>
<dt>edge_table</dt>
@@ -655,9 +655,9 @@
<dd>TEXT. A comma-delimited string containing multiple named arguments of
the form "name=value". The following parameters are supported for
this string argument:
- - src (INTEGER): Name of the column containing the source vertex ids in the
+ - src (INTEGER or BIGINT): Name of the column containing the source vertex ids in the
edge table. Default column name is 'src'.
- - dest (INTEGER): Name of the column containing the destination vertex ids
+ - dest (INTEGER or BIGINT): Name of the column containing the destination vertex ids
in the edge table. Default column name is 'dest'.
- weight (FLOAT8): Name of the column containing the edge weights in the
edge table. Default column name is 'weight'.</dd>
diff --git a/src/ports/postgres/modules/graph/pagerank.py_in b/src/ports/postgres/modules/graph/pagerank.py_in
index e732675..830432b 100644
--- a/src/ports/postgres/modules/graph/pagerank.py_in
+++ b/src/ports/postgres/modules/graph/pagerank.py_in
@@ -68,6 +68,7 @@
"PageRank: Invalid damping factor value ({0}), must be between 0 and 1.".
format(damping_factor))
+
# Validate against the given set of nodes for Personalized Page Rank
if personalization_vertices:
grouping_cols = get_table_qualified_col_str(
@@ -76,6 +77,9 @@
if grouping_cols_list else ''
src = edge_params["src"]
dest = edge_params["dest"]
+ input_personalization_vertices_length = len(personalization_vertices)
+
+ personalization_vertices_str = ','.join([str(i) for i in personalization_vertices])
# Get a list which has the number of personalization nodes of each group
vertices_count_list_by_group = plpy.execute("""
@@ -84,11 +88,10 @@
RIGHT JOIN {edge_table}
ON ({vertex_table}.{vertex_id} = {edge_table}.{src}
OR {vertex_table}.{vertex_id} = {edge_table}.{dest})
- AND {vertex_table}.{vertex_id} = ANY(ARRAY{personalization_vertices})
+ AND {vertex_table}.{vertex_id} = ANY(ARRAY[{personalization_vertices_str}])
{group_by_clause}
""".format(**locals()))
- input_personalization_vertices_length = len(personalization_vertices)
# The number of personalization nodes for every group should be equal to
# the number given by input personalization_vertices list. Otherwise,
@@ -277,12 +280,15 @@
# factor for computing the random_prob
where_clause_ppr = ''
if personalization_vertices:
+ personalization_vertices_str = ','.join(
+ [str(i) for i in personalization_vertices])
where_clause_ppr = """
- where __vertices__ = ANY(ARRAY{personalization_vertices})
+ where __vertices__ = ANY(ARRAY[{personalization_vertices_str}])
""".format(**locals())
random_prob_grp = 1.0 - damping_factor
init_prob_grp = 1.0 / total_ppr_nodes
else:
+ personalization_vertices_str = ''
random_prob_grp = """
{rand_damp}/COUNT(__vertices__)::DOUBLE PRECISION
""".format(**locals())
@@ -315,7 +321,7 @@
if personalization_vertices:
init_prob_grp_ppr = 1.0 / total_ppr_nodes
init_pr = """
- CASE when __vertices__ = ANY(ARRAY{personalization_vertices})
+ CASE when __vertices__ = ANY(ARRAY[{personalization_vertices_str}])
THEN {init_prob_grp_ppr} ELSE 0 END
""".format(**locals())
@@ -387,7 +393,7 @@
plpy.execute("""
CREATE TABLE {out_table} (
{grouping_cols_clause},
- {vertex_id} INTEGER,
+ {vertex_id} BIGINT,
pagerank DOUBLE PRECISION
)
""".format(**locals()))
@@ -716,15 +722,18 @@
ppr_init_value = 1.0 / total_ppr_nodes
prob_value = 1.0 - damping_factor
dest = edge_params["dest"]
+
+ personalization_vertices_str = ','.join([str(i) for i in personalization_vertices])
+
# In case of PPR, Assign the Random jump probability to the personalization_vertices only.
# For rest of the nodes, Random jump probability will be zero.
ppr_random_prob_clause = """
- CASE WHEN {edge_temp_table}.{dest} = ANY(ARRAY{personalization_vertices})
+ CASE WHEN {edge_temp_table}.{dest} = ANY(ARRAY[{personalization_vertices_str}])
THEN {prob_value} ELSE 0 END
""".format(**locals())
ppr_init_prob_clause = """
- CASE WHEN {vertex_id} = ANY(ARRAY{personalization_vertices})
+ CASE WHEN {vertex_id} = ANY(ARRAY[{personalization_vertices_str}])
THEN {ppr_init_value} ELSE 0 END
""".format(**locals())
return(total_ppr_nodes, ppr_random_prob_clause, ppr_init_prob_clause)
@@ -754,7 +763,7 @@
-- N is number of vertices in the graph)
grouping_col TEXT, -- Comma separated column names to group on
-- (DEFAULT = NULL, no grouping)
- personalization_vertices ARRAY OF INTEGER, -- A comma seperated list of vertices
+ personalization_vertices ARRAY OF BIGINT, -- A comma seperated list of vertices
or nodes for personalized page rank.
""") + """
diff --git a/src/ports/postgres/modules/graph/pagerank.sql_in b/src/ports/postgres/modules/graph/pagerank.sql_in
index cd239bd..8338fb6 100644
--- a/src/ports/postgres/modules/graph/pagerank.sql_in
+++ b/src/ports/postgres/modules/graph/pagerank.sql_in
@@ -77,7 +77,7 @@
<dt>vertex_id</dt>
<dd>TEXT, default = 'id'. Name of the column in 'vertex_table' containing
-vertex ids. The vertex ids are of type INTEGER with no duplicates.
+vertex ids. The vertex ids can be of type INTEGER or BIGINT with no duplicates.
They do not need to be contiguous.</dd>
<dt>edge_table</dt>
@@ -88,9 +88,9 @@
<dd>TEXT. A comma-delimited string containing multiple named arguments of
the form "name=value". The following parameters are supported for
this string argument:
- - src (INTEGER): Name of the column containing the source vertex ids in the edge table.
+ - src (INTEGER or BIGINT): Name of the column containing the source vertex ids in the edge table.
Default column name is 'src'.
- - dest (INTEGER): Name of the column containing the destination vertex ids in the edge table.
+ - dest (INTEGER or BIGINT): Name of the column containing the destination vertex ids in the edge table.
Default column name is 'dest'.</dd>
<dt>out_table</dt>
@@ -127,7 +127,7 @@
@note Expressions are not currently supported for 'grouping_cols'.</dd>
<dt> personalization_vertices (optional)</dt>
-<dd>INTEGER[], default: NULL. A comma separated list of vertices or nodes
+<dd>INTEGER[] or BIGINT[], default: NULL. A comma separated list of vertices or nodes
for personalized PageRank. When this parameter is provided, personalized PageRank
will run. In the absence of this parameter, regular PageRank will run.
</dl>
@@ -349,7 +349,7 @@
max_iter INTEGER,
threshold FLOAT8,
grouping_cols VARCHAR,
- personalization_vertices INTEGER[]
+ personalization_vertices BIGINT[]
) RETURNS VOID AS $$
PythonFunction(graph, pagerank, pagerank)
$$ LANGUAGE plpythonu VOLATILE
diff --git a/src/ports/postgres/modules/graph/sssp.py_in b/src/ports/postgres/modules/graph/sssp.py_in
index 26ddf88..9e813b0 100644
--- a/src/ports/postgres/modules/graph/sssp.py_in
+++ b/src/ports/postgres/modules/graph/sssp.py_in
@@ -141,7 +141,7 @@
w_type = get_expr_type(weight, edge_table).lower()
init_w = INT_MAX
- if w_type in ['real', 'double precision', 'float8']:
+ if w_type in ['real', 'double precision', 'float8', 'bigint']:
init_w = INFINITY
# We keep a table of every vertex, the minimum cost to that destination
@@ -162,7 +162,7 @@
vertex_id TEXT,
edge_table TEXT,
edge_args TEXT,
- source_vertex INTEGER,
+ source_vertex BIGINT,
out_table TEXT,
grouping_cols TEXT)
""".format(**locals()))
diff --git a/src/ports/postgres/modules/graph/sssp.sql_in b/src/ports/postgres/modules/graph/sssp.sql_in
index fd05fcf..195759d 100644
--- a/src/ports/postgres/modules/graph/sssp.sql_in
+++ b/src/ports/postgres/modules/graph/sssp.sql_in
@@ -68,7 +68,7 @@
<dt>vertex_id</dt>
<dd>TEXT, default = 'id'. Name of the column in 'vertex_table' containing
-vertex ids. The vertex ids are of type BIGINT with no duplicates.
+vertex ids. The vertex ids can be of type INTEGER or BIGINT with no duplicates.
They do not need to be contiguous.</dd>
<dt>edge_table</dt>
@@ -80,12 +80,12 @@
<dd>TEXT. A comma-delimited string containing multiple named arguments of
the form "name=value". The following parameters are supported for
this string argument:
- - src (BIGINT): Name of the column containing the source vertex ids in the edge table. Default column name is 'src'.
- - dest (BIGINT): Name of the column containing the destination vertex ids in the edge table. Default column name is 'dest'.
+ - src (INTEGER or BIGINT): Name of the column containing the source vertex ids in the edge table. Default column name is 'src'.
+ - dest (INTEGER or BIGINT): Name of the column containing the destination vertex ids in the edge table. Default column name is 'dest'.
- weight (FLOAT8): Name of the column containing the edge weights in the edge table. Default column name is 'weight'.</dd>
<dt>source_vertex</dt>
-<dd>BIGINT. The source vertex id for the algorithm to start. This vertex id must
+<dd>INTEGER or BIGINT. The source vertex id for the algorithm to start. This vertex id must
exist in the 'vertex_id' column of 'vertex_table'.</dd>
<dt>out_table</dt>
@@ -125,7 +125,7 @@
<dd>TEXT. Name of the table that contains the SSSP output.</dd>
<dt>dest_vertex</dt>
-<dd>BIGINT. The vertex that will be the destination of the desired path.</dd>
+<dd>INTEGER or BIGINT. The vertex that will be the destination of the desired path.</dd>
<dt>path_table</dt>
<dd>TEXT. Name of the output table that contains the path.
diff --git a/src/ports/postgres/modules/graph/test/apsp.sql_in b/src/ports/postgres/modules/graph/test/apsp.sql_in
index 4f399cc..05cd00d 100644
--- a/src/ports/postgres/modules/graph/test/apsp.sql_in
+++ b/src/ports/postgres/modules/graph/test/apsp.sql_in
@@ -113,12 +113,12 @@
ALTER TABLE vertex RENAME COLUMN "DEST" TO id;
-- Test for bigint columns
-CREATE TABLE v2 AS SELECT id::bigint FROM vertex;
-CREATE TABLE e2 AS SELECT src::bigint, "DEST"::bigint, weight FROM "EDGE";
+CREATE TABLE v2 AS SELECT (id+992147483647)::bigint as id FROM vertex;
+CREATE TABLE e2 AS SELECT (src+992147483647)::bigint as src, ("DEST"+992147483647)::bigint as dest, weight FROM "EDGE";
DROP TABLE IF EXISTS pg_temp.out2, pg_temp.out2_summary, pg_temp.out2_path;
-SELECT graph_apsp('v2',NULL,'e2','dest="DEST"','pg_temp.out2');
-SELECT graph_apsp_get_path('pg_temp.out2',0,7,'pg_temp.out2_path');
+SELECT graph_apsp('v2',NULL,'e2',NULL,'pg_temp.out2');
+SELECT graph_apsp_get_path('pg_temp.out2',992147483647,992147483652,'pg_temp.out2_path');
-- Test for infinite paths
DROP TABLE IF EXISTS out, out_summary, out_path;
diff --git a/src/ports/postgres/modules/graph/test/bfs.sql_in b/src/ports/postgres/modules/graph/test/bfs.sql_in
index 46f97fb..38f7aa1 100644
--- a/src/ports/postgres/modules/graph/test/bfs.sql_in
+++ b/src/ports/postgres/modules/graph/test/bfs.sql_in
@@ -291,11 +291,12 @@
ALTER TABLE vertex RENAME COLUMN dest TO id;
-- Test for bigint columns
-CREATE TABLE v2 AS SELECT id::bigint FROM vertex;
-CREATE TABLE e2 AS SELECT "SRC"::bigint, dest::bigint, weight FROM "EDGE";
+
+CREATE TABLE v2 AS SELECT (id+992147483647)::bigint as id FROM vertex;
+CREATE TABLE e2 AS SELECT ("SRC"+992147483647)::bigint as src, (dest+992147483647)::bigint as dest FROM "EDGE";
DROP TABLE IF EXISTS pg_temp.out2, pg_temp.out2_summary;
-SELECT graph_bfs('v2',NULL,'e2','src="SRC"',3,'pg_temp.out2');
+SELECT graph_bfs('v2',NULL,'e2',NULL,992147483650,'pg_temp.out2');
SELECT count(*) from pg_temp.out2;
SELECT * from pg_temp.out2_summary;
diff --git a/src/ports/postgres/modules/graph/test/hits.sql_in b/src/ports/postgres/modules/graph/test/hits.sql_in
index 2ef4d63..b15778a 100644
--- a/src/ports/postgres/modules/graph/test/hits.sql_in
+++ b/src/ports/postgres/modules/graph/test/hits.sql_in
@@ -173,8 +173,9 @@
ALTER TABLE vertex RENAME COLUMN dest TO id;
-- Test for bigint columns
-CREATE TABLE v2 AS SELECT id::bigint FROM vertex;
-CREATE TABLE e2 AS SELECT src::bigint, dest::bigint FROM edge;
+
+CREATE TABLE v2 AS SELECT (id+992147483647)::bigint as id FROM vertex;
+CREATE TABLE e2 AS SELECT (src+992147483647)::bigint as src, (dest+992147483647)::bigint as dest FROM edge;
DROP TABLE IF EXISTS pg_temp.out2, pg_temp.out2_summary;
SELECT hits('v2',NULL,'e2',NULL,'pg_temp.out2');
diff --git a/src/ports/postgres/modules/graph/test/measures.sql_in b/src/ports/postgres/modules/graph/test/measures.sql_in
index d3d0081..14dbab4 100644
--- a/src/ports/postgres/modules/graph/test/measures.sql_in
+++ b/src/ports/postgres/modules/graph/test/measures.sql_in
@@ -198,8 +198,10 @@
-- Test for bigint columns
-CREATE TABLE v2 AS SELECT id::bigint FROM vertex;
-CREATE TABLE e2 AS SELECT src_id::bigint, "DEST_ID"::bigint, edge_weight FROM "EDGE";
+CREATE TABLE v2 AS SELECT (id+992147483647)::bigint as id FROM vertex;
+CREATE TABLE e2 AS
+SELECT (src_id+992147483647)::bigint as src_id, ("DEST_ID"+992147483647)::bigint as "DEST_ID", edge_weight
+FROM "EDGE";
DROP TABLE IF EXISTS out_apsp, out_apsp_summary;
SELECT graph_apsp('v2', -- Vertex table
diff --git a/src/ports/postgres/modules/graph/test/pagerank.sql_in b/src/ports/postgres/modules/graph/test/pagerank.sql_in
index 22794e9..870ed00 100644
--- a/src/ports/postgres/modules/graph/test/pagerank.sql_in
+++ b/src/ports/postgres/modules/graph/test/pagerank.sql_in
@@ -209,8 +209,9 @@
ALTER TABLE vertex RENAME COLUMN dest TO id;
-- Test for bigint columns
-CREATE TABLE v2 AS SELECT id::bigint FROM vertex;
-CREATE TABLE e2 AS SELECT src::bigint, dest::bigint FROM "EDGE";
+
+CREATE TABLE v2 AS SELECT (id+992147483647)::bigint as id FROM vertex;
+CREATE TABLE e2 AS SELECT (src+992147483647)::bigint as src, (dest+992147483647)::bigint as dest FROM "EDGE";
DROP TABLE IF EXISTS pg_temp.out2, pg_temp.out2_summary;
SELECT pagerank('v2',NULL,'e2',NULL,'pg_temp.out2');
diff --git a/src/ports/postgres/modules/graph/test/sssp.sql_in b/src/ports/postgres/modules/graph/test/sssp.sql_in
index 1a01c55..2bd359a 100644
--- a/src/ports/postgres/modules/graph/test/sssp.sql_in
+++ b/src/ports/postgres/modules/graph/test/sssp.sql_in
@@ -158,13 +158,14 @@
ALTER TABLE vertex RENAME COLUMN dest TO id;
-- Test for bigint columns
-CREATE TABLE v2 AS SELECT id::bigint FROM vertex;
-CREATE TABLE e2 AS SELECT src::bigint, dest::bigint, weight FROM "EDGE";
+
+CREATE TABLE v2 AS SELECT (id+992147483647)::bigint as id FROM vertex;
+CREATE TABLE e2 AS SELECT (src+992147483647)::bigint as src, (dest+992147483647)::bigint as dest, weight FROM "EDGE";
DROP TABLE IF EXISTS pg_temp.out2, pg_temp.out2_summary, pg_temp.out2_path;
-SELECT graph_sssp('v2',NULL,'e2',NULL,0,'pg_temp.out2');
+SELECT graph_sssp('v2',NULL,'e2',NULL,992147483647,'pg_temp.out2');
SELECT count(*) from pg_temp.out2;
-SELECT graph_sssp_get_path('pg_temp.out2',5,'pg_temp.out2_path');
+SELECT graph_sssp_get_path('pg_temp.out2',992147483652,'pg_temp.out2_path');
-- Test for infinite paths
DROP TABLE IF EXISTS out, out_summary, out_path;
diff --git a/src/ports/postgres/modules/graph/test/wcc.sql_in b/src/ports/postgres/modules/graph/test/wcc.sql_in
index 6dc5e7f..0917ae2 100644
--- a/src/ports/postgres/modules/graph/test/wcc.sql_in
+++ b/src/ports/postgres/modules/graph/test/wcc.sql_in
@@ -176,10 +176,11 @@
ALTER TABLE vertex RENAME COLUMN dest TO id;
-- Test for bigint columns
-CREATE TABLE v2 AS SELECT id::bigint FROM vertex;
-CREATE TABLE e2 AS SELECT src_node::bigint, dest_node::bigint FROM "EDGE";
+
+CREATE TABLE v2 AS SELECT (id+992147483647)::bigint as id FROM vertex;
+CREATE TABLE e2 AS SELECT (src_node+992147483647)::bigint as src, (dest_node+992147483647)::bigint as dest FROM "EDGE";
DROP TABLE IF EXISTS pg_temp.out2, pg_temp.out2_summary;
-SELECT weakly_connected_components('v2',NULL,'e2','src=src_node,dest=dest_node','pg_temp.out2');
+SELECT weakly_connected_components('v2',NULL,'e2',NULL,'pg_temp.out2');
SELECT count(*) from pg_temp.out2;
SELECT count(*) from pg_temp.out2_summary;
diff --git a/src/ports/postgres/modules/graph/wcc.py_in b/src/ports/postgres/modules/graph/wcc.py_in
index 28b06f5..c8b5ab2 100644
--- a/src/ports/postgres/modules/graph/wcc.py_in
+++ b/src/ports/postgres/modules/graph/wcc.py_in
@@ -121,7 +121,7 @@
edge_to_update_where_condition = ''
edge_inverse_to_update_where_condition = ''
- INT_MAX = 2147483647
+ BIGINT_MAX = 9223372036854775807
component_id = 'component_id'
grouping_cols_comma = '' if not grouping_cols else grouping_cols + ','
comma_grouping_cols = '' if not grouping_cols else ',' + grouping_cols
@@ -168,7 +168,7 @@
plpy.execute("""
CREATE TABLE {newupdate} AS
SELECT {subq}.{vertex_id},
- CAST({INT_MAX} AS INT) AS {component_id}
+ CAST({BIGINT_MAX} AS BIGINT) AS {component_id}
{select_grouping_cols}
FROM {distinct_grp_table} INNER JOIN (
SELECT {select_grouping_cols_clause} {src} AS {vertex_id}
@@ -188,7 +188,7 @@
plpy.execute("""
CREATE TEMP TABLE {message} AS
SELECT {vertex_id},
- CAST({vertex_id} AS INT) AS {component_id}
+ CAST({vertex_id} AS BIGINT) AS {component_id}
{select_grouping_cols_clause}
FROM {newupdate}
{distribution}
@@ -197,13 +197,13 @@
else:
plpy.execute("""
CREATE TABLE {newupdate} AS
- SELECT {vertex_id}, CAST({INT_MAX} AS INT) AS {component_id}
+ SELECT {vertex_id}, CAST({BIGINT_MAX} AS BIGINT) AS {component_id}
FROM {vertex_table}
{distribution}
""".format(**locals()))
plpy.execute("""
CREATE TEMP TABLE {message} AS
- SELECT {vertex_id}, CAST({vertex_id} AS INT) AS {component_id}
+ SELECT {vertex_id}, CAST({vertex_id} AS BIGINT) AS {component_id}
FROM {vertex_table}
{distribution}
""".format(**locals()))
@@ -211,7 +211,7 @@
while nodes_to_update > 0:
# Look at all the neighbors of a node, and assign the smallest node id
# among the neighbors as its component_id. The next table starts off
- # with very high component_id (INT_MAX). The component_id of all nodes
+ # with very high component_id (BIGINT_MAX). The component_id of all nodes
# which obtain a smaller component_id after looking at its neighbors are
# updated in the next table. At every iteration update only those nodes
# whose component_id in the previous iteration are greater than what was
diff --git a/src/ports/postgres/modules/graph/wcc.sql_in b/src/ports/postgres/modules/graph/wcc.sql_in
index bc6ce7a..ad70e3f 100644
--- a/src/ports/postgres/modules/graph/wcc.sql_in
+++ b/src/ports/postgres/modules/graph/wcc.sql_in
@@ -72,7 +72,7 @@
<dt>vertex_id</dt>
<dd>TEXT, default = 'id'. Name of the column in 'vertex_table' containing
-vertex ids. The vertex ids are of type INTEGER with no duplicates.
+vertex ids. The vertex ids can be of type INTEGER or BIGINT with no duplicates.
They do not need to be contiguous.</dd>
<dt>edge_table</dt>
@@ -83,10 +83,8 @@
<dd>TEXT. A comma-delimited string containing multiple named arguments of
the form "name=value". The following parameters are supported for
this string argument:
- - src (INTEGER): Name of the column containing the source vertex ids in the edge table.
- Default column name is 'src'.
- - dest (INTEGER): Name of the column containing the destination vertex ids in the edge table.
- Default column name is 'dest'.</dd>
+ - src (INTEGER or BIGINT): Name of the column containing the source vertex ids in the edge table. Default column name is 'src'.
+ - dest (INTEGER or BIGINT): Name of the column containing the destination vertex ids in the edge table. Default column name is 'dest'.</dd>
<dt>out_table</dt>
<dd>TEXT. Name of the table to store the component ID associated with each vertex.