runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/graph/Networks.java - beam - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.beam.runners.dataflow.worker.graph;

 import static org.apache.beam.vendor.guava.v20_0.com.google.common.base.Preconditions.checkArgument;
 import static org.apache.beam.vendor.guava.v20_0.com.google.common.base.Preconditions.checkNotNull;

 import java.util.ArrayDeque;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Queue;
 import java.util.Set;
 import java.util.function.Function;
 import org.apache.beam.runners.dataflow.worker.graph.Nodes.Node;
 import org.apache.beam.vendor.guava.v20_0.com.google.common.collect.ImmutableList;
 import org.apache.beam.vendor.guava.v20_0.com.google.common.collect.ImmutableSet;
 import org.apache.beam.vendor.guava.v20_0.com.google.common.collect.Maps;
 import org.apache.beam.vendor.guava.v20_0.com.google.common.graph.EndpointPair;
 import org.apache.beam.vendor.guava.v20_0.com.google.common.graph.MutableNetwork;
 import org.apache.beam.vendor.guava.v20_0.com.google.common.graph.Network;

 /** Static utility methods for {@link Network} instances that are directed. */
 public class Networks {
   /**
    * An abstract class that can be extended to apply a function in a type safe manner.
    *
    * <p>Applies {@link #typedApply} to all instances of {@code type}. Otherwise returns the existing
    * {@link Node} unmodified.
    */
   public abstract static class TypeSafeNodeFunction<T extends Node>
       implements Function<Node, Node> {
     private final Class<T> type;

     public TypeSafeNodeFunction(Class<T> type) {
       checkNotNull(type);
       this.type = type;
     }

     @SuppressWarnings("unchecked")
     @Override
     public final Node apply(Node input) {
       if (type.isInstance(input)) {
         return typedApply((T) input);
       }
       return input;
     }

     public abstract Node typedApply(T input);
   }

   /**
    * Applies the {@code function} to all nodes within the {@code network}. Replaces any node which
    * is not {@link #equals(Object)} to the original node, maintaining all existing edges between
    * nodes.
    */
   public static <N, E> void replaceDirectedNetworkNodes(
       MutableNetwork<N, E> network, Function<N, N> function) {
     checkArgument(network.isDirected(), "Only directed networks are supported, given %s", network);
     checkArgument(
         !network.allowsSelfLoops(),
         "Only networks without self loops are supported, given %s",
         network);

     // A map from the existing node to the replacement node
     Map<N, N> oldNodesToNewNodes = new HashMap<>(network.nodes().size());
     for (N currentNode : network.nodes()) {
       N newNode = function.apply(currentNode);
       // Skip updating the network if the old node is equivalent to the new node
       if (!currentNode.equals(newNode)) {
         oldNodesToNewNodes.put(currentNode, newNode);
       }
     }

     // For each replacement, connect up the existing predecessors and successors to the new node
     // and then remove the old node.
     for (Map.Entry<N, N> entry : oldNodesToNewNodes.entrySet()) {
       N oldNode = entry.getKey();
       N newNode = entry.getValue();
       network.addNode(newNode);
       for (N predecessor : ImmutableSet.copyOf(network.predecessors(oldNode))) {
         for (E edge : ImmutableSet.copyOf(network.edgesConnecting(predecessor, oldNode))) {
           network.removeEdge(edge);
           network.addEdge(predecessor, newNode, edge);
         }
       }
       for (N successor : ImmutableSet.copyOf(network.successors(oldNode))) {
         for (E edge : ImmutableSet.copyOf(network.edgesConnecting(oldNode, successor))) {
           network.removeEdge(edge);
           network.addEdge(newNode, successor, edge);
         }
       }
       network.removeNode(oldNode);
     }
   }

   /**
    * Returns the set of nodes that are reachable from {@code startNodes} up to and including {@code
    * endNodes}. Node B is defined as reachable from node A if there exists a path (a sequence of
    * adjacent outgoing edges) starting at node A and ending at node B which does not pass through
    * any node in {@code endNodes}. Note that a node is always reachable from itself via a
    * zero-length path.
    *
    * <p>This is a "snapshot" based on the current topology of the {@code network}, rather than a
    * live view of the set of nodes reachable from {@code node}. In other words, the returned {@link
    * Set} will not be updated after modifications to the {@code network}.
    */
   public static <N, E> Set<N> reachableNodes(
       Network<N, E> network, Set<N> startNodes, Set<N> endNodes) {
     Set<N> visitedNodes = new HashSet<>();
     Queue<N> queuedNodes = new ArrayDeque<>();
     queuedNodes.addAll(startNodes);
     // Perform a breadth-first traversal rooted at the input node.
     while (!queuedNodes.isEmpty()) {
       N currentNode = queuedNodes.remove();
       // If we have already visited this node or it is a terminal node than do not add any
       // successors.
       if (!visitedNodes.add(currentNode) || endNodes.contains(currentNode)) {
         continue;
       }
       queuedNodes.addAll(network.successors(currentNode));
     }
     return visitedNodes;
   }

   /** Returns a set of nodes sorted in topological order. */
   public static <N, E> Set<N> topologicalOrder(Network<N, E> network) {
     // TODO: Upgrade Guava and remove this method if topological sorting becomes
     // supported externally or remove this comment if its not going to be supported externally.

     checkArgument(network.isDirected(), "Only directed networks are supported, given %s", network);
     checkArgument(
         !network.allowsSelfLoops(),
         "Only networks without self loops are supported, given %s",
         network);

     // Linked hashset will prevent duplicates from appearing and will maintain insertion order.
     LinkedHashSet<N> nodes = new LinkedHashSet<>(network.nodes().size());
     Queue<N> processingOrder = new ArrayDeque<>();
     // Add all the roots
     for (N node : network.nodes()) {
       if (network.inDegree(node) == 0) {
         processingOrder.add(node);
       }
     }

     while (!processingOrder.isEmpty()) {
       N current = processingOrder.remove();
       // If all predecessors have already been added, then we can add this node, otherwise
       // we need to add the node to the back of the processing queue.
       if (nodes.containsAll(network.predecessors(current))) {
         nodes.add(current);
         processingOrder.addAll(network.successors(current));
       } else {
         processingOrder.add(current);
       }
     }

     return nodes;
   }

   public static <N, E> String toDot(Network<N, E> network) {
     StringBuilder builder = new StringBuilder();
     builder.append("digraph network {\n");
     Map<N, String> nodeName = Maps.newIdentityHashMap();
     network.nodes().forEach(node -> nodeName.put(node, "n" + nodeName.size()));
     for (Entry<N, String> nodeEntry : nodeName.entrySet()) {
       builder.append(
           String.format(
               "  %s [fontname=\"Courier New\" label=\"%s\"];%n",
               nodeEntry.getValue(), escapeDot(nodeEntry.getKey().toString())));
     }
     for (E edge : network.edges()) {
       EndpointPair<N> endpoints = network.incidentNodes(edge);
       builder.append(
           String.format(
               "  %s -> %s [fontname=\"Courier New\" label=\"%s\"];%n",
               nodeName.get(endpoints.source()),
               nodeName.get(endpoints.target()),
               escapeDot(edge.toString())));
     }
     builder.append("}");
     return builder.toString();
   }

   private static String escapeDot(String s) {
     return s.replace("\\", "\\\\")
         .replace("\"", "\\\"")
         // http://www.graphviz.org/doc/info/attrs.html#k:escString
         // The escape sequences "\n", "\l" and "\r" divide the label into lines, centered,
         // left-justified, and right-justified, respectively.
         .replace("\n", "\\l");
   }

   /**
    * Returns a list of all distinct paths from roots of the network to leaves. The list can be in
    * arbitrary orders and can contain duplicate paths if there are multiple edges from two nodes.
    */
   public static <NodeT, EdgeT> List<List<NodeT>> allPathsFromRootsToLeaves(
       Network<NodeT, EdgeT> network) {
     ArrayDeque<List<NodeT>> paths = new ArrayDeque<>();
     // Populate the list with all roots
     for (NodeT node : network.nodes()) {
       if (network.inDegree(node) == 0) {
         paths.add(ImmutableList.of(node));
       }
     }

     List<List<NodeT>> distinctPathsFromRootsToLeaves = new ArrayList<>();
     while (!paths.isEmpty()) {
       List<NodeT> path = paths.removeFirst();
       NodeT lastNode = path.get(path.size() - 1);
       if (network.outDegree(lastNode) == 0) {
         distinctPathsFromRootsToLeaves.add(new ArrayList<>(path));
       } else {
         for (EdgeT edge : network.outEdges(lastNode)) {
           paths.addFirst(
               ImmutableList.<NodeT>builder()
                   .addAll(path)
                   .add(network.incidentNodes(edge).target())
                   .build());
         }
       }
     }
     return distinctPathsFromRootsToLeaves;
   }

   // Hide visibility to prevent instantiation
   private Networks() {}
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.beam.runners.dataflow.worker.graph;

	import static org.apache.beam.vendor.guava.v20_0.com.google.common.base.Preconditions.checkArgument;
	import static org.apache.beam.vendor.guava.v20_0.com.google.common.base.Preconditions.checkNotNull;

	import java.util.ArrayDeque;
	import java.util.ArrayList;
	import java.util.HashMap;
	import java.util.HashSet;
	import java.util.LinkedHashSet;
	import java.util.List;
	import java.util.Map;
	import java.util.Map.Entry;
	import java.util.Queue;
	import java.util.Set;
	import java.util.function.Function;
	import org.apache.beam.runners.dataflow.worker.graph.Nodes.Node;
	import org.apache.beam.vendor.guava.v20_0.com.google.common.collect.ImmutableList;
	import org.apache.beam.vendor.guava.v20_0.com.google.common.collect.ImmutableSet;
	import org.apache.beam.vendor.guava.v20_0.com.google.common.collect.Maps;
	import org.apache.beam.vendor.guava.v20_0.com.google.common.graph.EndpointPair;
	import org.apache.beam.vendor.guava.v20_0.com.google.common.graph.MutableNetwork;
	import org.apache.beam.vendor.guava.v20_0.com.google.common.graph.Network;

	/** Static utility methods for {@link Network} instances that are directed. */
	public class Networks {
	/**
	* An abstract class that can be extended to apply a function in a type safe manner.
	*
	* <p>Applies {@link #typedApply} to all instances of {@code type}. Otherwise returns the existing
	* {@link Node} unmodified.
	*/
	public abstract static class TypeSafeNodeFunction<T extends Node>
	implements Function<Node, Node> {
	private final Class<T> type;

	public TypeSafeNodeFunction(Class<T> type) {
	checkNotNull(type);
	this.type = type;
	}

	@SuppressWarnings("unchecked")
	@Override
	public final Node apply(Node input) {
	if (type.isInstance(input)) {
	return typedApply((T) input);
	}
	return input;
	}

	public abstract Node typedApply(T input);
	}

	/**
	* Applies the {@code function} to all nodes within the {@code network}. Replaces any node which
	* is not {@link #equals(Object)} to the original node, maintaining all existing edges between
	* nodes.
	*/
	public static <N, E> void replaceDirectedNetworkNodes(
	MutableNetwork<N, E> network, Function<N, N> function) {
	checkArgument(network.isDirected(), "Only directed networks are supported, given %s", network);
	checkArgument(
	!network.allowsSelfLoops(),
	"Only networks without self loops are supported, given %s",
	network);

	// A map from the existing node to the replacement node
	Map<N, N> oldNodesToNewNodes = new HashMap<>(network.nodes().size());
	for (N currentNode : network.nodes()) {
	N newNode = function.apply(currentNode);
	// Skip updating the network if the old node is equivalent to the new node
	if (!currentNode.equals(newNode)) {
	oldNodesToNewNodes.put(currentNode, newNode);
	}
	}

	// For each replacement, connect up the existing predecessors and successors to the new node
	// and then remove the old node.
	for (Map.Entry<N, N> entry : oldNodesToNewNodes.entrySet()) {
	N oldNode = entry.getKey();
	N newNode = entry.getValue();
	network.addNode(newNode);
	for (N predecessor : ImmutableSet.copyOf(network.predecessors(oldNode))) {
	for (E edge : ImmutableSet.copyOf(network.edgesConnecting(predecessor, oldNode))) {
	network.removeEdge(edge);
	network.addEdge(predecessor, newNode, edge);
	}
	}
	for (N successor : ImmutableSet.copyOf(network.successors(oldNode))) {
	for (E edge : ImmutableSet.copyOf(network.edgesConnecting(oldNode, successor))) {
	network.removeEdge(edge);
	network.addEdge(newNode, successor, edge);
	}
	}
	network.removeNode(oldNode);
	}
	}

	/**
	* Returns the set of nodes that are reachable from {@code startNodes} up to and including {@code
	* endNodes}. Node B is defined as reachable from node A if there exists a path (a sequence of
	* adjacent outgoing edges) starting at node A and ending at node B which does not pass through
	* any node in {@code endNodes}. Note that a node is always reachable from itself via a
	* zero-length path.
	*
	* <p>This is a "snapshot" based on the current topology of the {@code network}, rather than a
	* live view of the set of nodes reachable from {@code node}. In other words, the returned {@link
	* Set} will not be updated after modifications to the {@code network}.
	*/
	public static <N, E> Set<N> reachableNodes(
	Network<N, E> network, Set<N> startNodes, Set<N> endNodes) {
	Set<N> visitedNodes = new HashSet<>();
	Queue<N> queuedNodes = new ArrayDeque<>();
	queuedNodes.addAll(startNodes);
	// Perform a breadth-first traversal rooted at the input node.
	while (!queuedNodes.isEmpty()) {
	N currentNode = queuedNodes.remove();
	// If we have already visited this node or it is a terminal node than do not add any
	// successors.
	if (!visitedNodes.add(currentNode) \|\| endNodes.contains(currentNode)) {
	continue;
	}
	queuedNodes.addAll(network.successors(currentNode));
	}
	return visitedNodes;
	}

	/** Returns a set of nodes sorted in topological order. */
	public static <N, E> Set<N> topologicalOrder(Network<N, E> network) {
	// TODO: Upgrade Guava and remove this method if topological sorting becomes
	// supported externally or remove this comment if its not going to be supported externally.

	checkArgument(network.isDirected(), "Only directed networks are supported, given %s", network);
	checkArgument(
	!network.allowsSelfLoops(),
	"Only networks without self loops are supported, given %s",
	network);

	// Linked hashset will prevent duplicates from appearing and will maintain insertion order.
	LinkedHashSet<N> nodes = new LinkedHashSet<>(network.nodes().size());
	Queue<N> processingOrder = new ArrayDeque<>();
	// Add all the roots
	for (N node : network.nodes()) {
	if (network.inDegree(node) == 0) {
	processingOrder.add(node);
	}
	}

	while (!processingOrder.isEmpty()) {
	N current = processingOrder.remove();
	// If all predecessors have already been added, then we can add this node, otherwise
	// we need to add the node to the back of the processing queue.
	if (nodes.containsAll(network.predecessors(current))) {
	nodes.add(current);
	processingOrder.addAll(network.successors(current));
	} else {
	processingOrder.add(current);
	}
	}

	return nodes;
	}

	public static <N, E> String toDot(Network<N, E> network) {
	StringBuilder builder = new StringBuilder();
	builder.append("digraph network {\n");
	Map<N, String> nodeName = Maps.newIdentityHashMap();
	network.nodes().forEach(node -> nodeName.put(node, "n" + nodeName.size()));
	for (Entry<N, String> nodeEntry : nodeName.entrySet()) {
	builder.append(
	String.format(
	" %s [fontname=\"Courier New\" label=\"%s\"];%n",
	nodeEntry.getValue(), escapeDot(nodeEntry.getKey().toString())));
	}
	for (E edge : network.edges()) {
	EndpointPair<N> endpoints = network.incidentNodes(edge);
	builder.append(
	String.format(
	" %s -> %s [fontname=\"Courier New\" label=\"%s\"];%n",
	nodeName.get(endpoints.source()),
	nodeName.get(endpoints.target()),
	escapeDot(edge.toString())));
	}
	builder.append("}");
	return builder.toString();
	}

	private static String escapeDot(String s) {
	return s.replace("\\", "\\\\")
	.replace("\"", "\\\"")
	// http://www.graphviz.org/doc/info/attrs.html#k:escString
	// The escape sequences "\n", "\l" and "\r" divide the label into lines, centered,
	// left-justified, and right-justified, respectively.
	.replace("\n", "\\l");
	}

	/**
	* Returns a list of all distinct paths from roots of the network to leaves. The list can be in
	* arbitrary orders and can contain duplicate paths if there are multiple edges from two nodes.
	*/
	public static <NodeT, EdgeT> List<List<NodeT>> allPathsFromRootsToLeaves(
	Network<NodeT, EdgeT> network) {
	ArrayDeque<List<NodeT>> paths = new ArrayDeque<>();
	// Populate the list with all roots
	for (NodeT node : network.nodes()) {
	if (network.inDegree(node) == 0) {
	paths.add(ImmutableList.of(node));
	}
	}

	List<List<NodeT>> distinctPathsFromRootsToLeaves = new ArrayList<>();
	while (!paths.isEmpty()) {
	List<NodeT> path = paths.removeFirst();
	NodeT lastNode = path.get(path.size() - 1);
	if (network.outDegree(lastNode) == 0) {
	distinctPathsFromRootsToLeaves.add(new ArrayList<>(path));
	} else {
	for (EdgeT edge : network.outEdges(lastNode)) {
	paths.addFirst(
	ImmutableList.<NodeT>builder()
	.addAll(path)
	.add(network.incidentNodes(edge).target())
	.build());
	}
	}
	}
	return distinctPathsFromRootsToLeaves;
	}

	// Hide visibility to prevent instantiation
	private Networks() {}
	}