| # Copyright 2019 The Google Research Authors. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| """Utility functions used by generate_graph.py.""" |
| from __future__ import absolute_import |
| from __future__ import division |
| from __future__ import print_function |
| |
| import hashlib |
| import itertools |
| |
| import numpy as np |
| |
| |
| def gen_is_edge_fn(bits): |
| """Generate a boolean function for the edge connectivity. |
| |
| Given a bitstring FEDCBA and a 4x4 matrix, the generated matrix is |
| [[0, A, B, D], |
| [0, 0, C, E], |
| [0, 0, 0, F], |
| [0, 0, 0, 0]] |
| |
| Note that this function is agnostic to the actual matrix dimension due to |
| order in which elements are filled out (column-major, starting from least |
| significant bit). For example, the same FEDCBA bitstring (0-padded) on a 5x5 |
| matrix is |
| [[0, A, B, D, 0], |
| [0, 0, C, E, 0], |
| [0, 0, 0, F, 0], |
| [0, 0, 0, 0, 0], |
| [0, 0, 0, 0, 0]] |
| |
| Args: |
| bits: integer which will be interpreted as a bit mask. |
| |
| Returns: |
| vectorized function that returns True when an edge is present. |
| """ |
| |
| def is_edge(x, y): |
| """Is there an edge from x to y (0-indexed)?""" |
| if x >= y: |
| return 0 |
| # Map x, y to index into bit string |
| index = x + (y * (y - 1) // 2) |
| return (bits >> index) % 2 == 1 |
| |
| return np.vectorize(is_edge) |
| |
| |
| def is_full_dag(matrix): |
| """Full DAG == all vertices on a path from vert 0 to (V-1). |
| |
| i.e. no disconnected or "hanging" vertices. |
| |
| It is sufficient to check for: |
| 1) no rows of 0 except for row V-1 (only output vertex has no out-edges) |
| 2) no cols of 0 except for col 0 (only input vertex has no in-edges) |
| |
| Args: |
| matrix: V x V upper-triangular adjacency matrix |
| |
| Returns: |
| True if the there are no dangling vertices. |
| """ |
| shape = np.shape(matrix) |
| |
| rows = matrix[:shape[0] - 1, :] == 0 |
| rows = np.all(rows, axis=1) # Any row with all 0 will be True |
| rows_bad = np.any(rows) |
| |
| cols = matrix[:, 1:] == 0 |
| cols = np.all(cols, axis=0) # Any col with all 0 will be True |
| cols_bad = np.any(cols) |
| |
| return (not rows_bad) and (not cols_bad) |
| |
| |
| def num_edges(matrix): |
| """Computes number of edges in adjacency matrix.""" |
| return np.sum(matrix) |
| |
| |
| def hash_module(matrix, labeling): |
| """Computes a graph-invariance MD5 hash of the matrix and label pair. |
| |
| Args: |
| matrix: np.ndarray square upper-triangular adjacency matrix. |
| labeling: list of int labels of length equal to both dimensions of |
| matrix. |
| |
| Returns: |
| MD5 hash of the matrix and labeling. |
| """ |
| vertices = np.shape(matrix)[0] |
| in_edges = np.sum(matrix, axis=0).tolist() |
| out_edges = np.sum(matrix, axis=1).tolist() |
| |
| assert len(in_edges) == len(out_edges) == len(labeling) |
| hashes = list(zip(out_edges, in_edges, labeling)) |
| hashes = [hashlib.md5(str(h).encode('utf-8')).hexdigest() for h in hashes] |
| # Computing this up to the diameter is probably sufficient but since the |
| # operation is fast, it is okay to repeat more times. |
| for _ in range(vertices): |
| new_hashes = [] |
| for v in range(vertices): |
| in_neighbors = [hashes[w] for w in range(vertices) if matrix[w, v]] |
| out_neighbors = [hashes[w] for w in range(vertices) if matrix[v, w]] |
| new_hashes.append(hashlib.md5( |
| (''.join(sorted(in_neighbors)) + '|' + |
| ''.join(sorted(out_neighbors)) + '|' + |
| hashes[v]).encode('utf-8')).hexdigest()) |
| hashes = new_hashes |
| fingerprint = hashlib.md5(str(sorted(hashes)).encode('utf-8')).hexdigest() |
| |
| return fingerprint |
| |
| |
| def permute_graph(graph, label, permutation): |
| """Permutes the graph and labels based on permutation. |
| |
| Args: |
| graph: np.ndarray adjacency matrix. |
| label: list of labels of same length as graph dimensions. |
| permutation: a permutation list of ints of same length as graph dimensions. |
| |
| Returns: |
| np.ndarray where vertex permutation[v] is vertex v from the original graph |
| """ |
| # vertex permutation[v] in new graph is vertex v in the old graph |
| forward_perm = zip(permutation, list(range(len(permutation)))) |
| inverse_perm = [x[1] for x in sorted(forward_perm)] |
| edge_fn = lambda x, y: graph[inverse_perm[x], inverse_perm[y]] == 1 |
| new_matrix = np.fromfunction(np.vectorize(edge_fn), |
| (len(label), len(label)), |
| dtype=np.int8) |
| new_label = [label[inverse_perm[i]] for i in range(len(label))] |
| return new_matrix, new_label |
| |
| |
| def is_isomorphic(graph1, graph2): |
| """Exhaustively checks if 2 graphs are isomorphic.""" |
| matrix1, label1 = np.array(graph1[0]), graph1[1] |
| matrix2, label2 = np.array(graph2[0]), graph2[1] |
| assert np.shape(matrix1) == np.shape(matrix2) |
| assert len(label1) == len(label2) |
| |
| vertices = np.shape(matrix1)[0] |
| # Note: input and output in our constrained graphs always map to themselves |
| # but this script does not enforce that. |
| for perm in itertools.permutations(range(0, vertices)): |
| pmatrix1, plabel1 = permute_graph(matrix1, label1, perm) |
| if np.array_equal(pmatrix1, matrix2) and plabel1 == label2: |
| return True |
| |
| return False |