| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.hadoop.net; |
| |
| import org.apache.hadoop.classification.InterfaceAudience; |
| import org.apache.hadoop.classification.InterfaceStability; |
| |
| /** |
| * The class extends NetworkTopology to represents a cluster of computer with |
| * a 4-layers hierarchical network topology. |
| * In this network topology, leaves represent data nodes (computers) and inner |
| * nodes represent switches/routers that manage traffic in/out of data centers, |
| * racks or physical host (with virtual switch). |
| * |
| * @see NetworkTopology |
| */ |
| @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) |
| @InterfaceStability.Unstable |
| public class NetworkTopologyWithNodeGroup extends NetworkTopology { |
| |
| public final static String DEFAULT_NODEGROUP = "/default-nodegroup"; |
| |
| public NetworkTopologyWithNodeGroup() { |
| clusterMap = new InnerNodeWithNodeGroup(InnerNode.ROOT); |
| } |
| |
| @Override |
| protected Node getNodeForNetworkLocation(Node node) { |
| // if node only with default rack info, here we need to add default |
| // nodegroup info |
| if (NetworkTopology.DEFAULT_RACK.equals(node.getNetworkLocation())) { |
| node.setNetworkLocation(node.getNetworkLocation() |
| + DEFAULT_NODEGROUP); |
| } |
| Node nodeGroup = getNode(node.getNetworkLocation()); |
| if (nodeGroup == null) { |
| nodeGroup = new InnerNodeWithNodeGroup(node.getNetworkLocation()); |
| } |
| return getNode(nodeGroup.getNetworkLocation()); |
| } |
| |
| @Override |
| public String getRack(String loc) { |
| netlock.readLock().lock(); |
| try { |
| loc = InnerNode.normalize(loc); |
| Node locNode = getNode(loc); |
| if (locNode instanceof InnerNodeWithNodeGroup) { |
| InnerNodeWithNodeGroup node = (InnerNodeWithNodeGroup) locNode; |
| if (node.isRack()) { |
| return loc; |
| } else if (node.isNodeGroup()) { |
| return node.getNetworkLocation(); |
| } else { |
| // may be a data center |
| return null; |
| } |
| } else { |
| // not in cluster map, don't handle it |
| return loc; |
| } |
| } finally { |
| netlock.readLock().unlock(); |
| } |
| } |
| |
| /** |
| * Given a string representation of a node group for a specific network |
| * location |
| * |
| * @param loc |
| * a path-like string representation of a network location |
| * @return a node group string |
| */ |
| public String getNodeGroup(String loc) { |
| netlock.readLock().lock(); |
| try { |
| loc = InnerNode.normalize(loc); |
| Node locNode = getNode(loc); |
| if (locNode instanceof InnerNodeWithNodeGroup) { |
| InnerNodeWithNodeGroup node = (InnerNodeWithNodeGroup) locNode; |
| if (node.isNodeGroup()) { |
| return loc; |
| } else if (node.isRack()) { |
| // not sure the node group for a rack |
| return null; |
| } else { |
| // may be a leaf node |
| if(!(node.getNetworkLocation() == null || |
| node.getNetworkLocation().isEmpty())) { |
| return getNodeGroup(node.getNetworkLocation()); |
| } else { |
| return NodeBase.ROOT; |
| } |
| } |
| } else { |
| // not in cluster map, don't handle it |
| return loc; |
| } |
| } finally { |
| netlock.readLock().unlock(); |
| } |
| } |
| |
| @Override |
| public boolean isOnSameRack( Node node1, Node node2) { |
| if (node1 == null || node2 == null || |
| node1.getParent() == null || node2.getParent() == null) { |
| return false; |
| } |
| |
| netlock.readLock().lock(); |
| try { |
| return isSameParents(node1.getParent(), node2.getParent()); |
| } finally { |
| netlock.readLock().unlock(); |
| } |
| } |
| |
| /** |
| * Check if two nodes are on the same node group (hypervisor) The |
| * assumption here is: each nodes are leaf nodes. |
| * |
| * @param node1 |
| * one node (can be null) |
| * @param node2 |
| * another node (can be null) |
| * @return true if node1 and node2 are on the same node group; false |
| * otherwise |
| * @exception IllegalArgumentException |
| * when either node1 or node2 is null, or node1 or node2 do |
| * not belong to the cluster |
| */ |
| @Override |
| public boolean isOnSameNodeGroup(Node node1, Node node2) { |
| if (node1 == null || node2 == null) { |
| return false; |
| } |
| netlock.readLock().lock(); |
| try { |
| return isSameParents(node1, node2); |
| } finally { |
| netlock.readLock().unlock(); |
| } |
| } |
| |
| /** |
| * Check if network topology is aware of NodeGroup |
| */ |
| @Override |
| public boolean isNodeGroupAware() { |
| return true; |
| } |
| |
| /** Add a leaf node |
| * Update node counter & rack counter if necessary |
| * @param node node to be added; can be null |
| * @exception IllegalArgumentException if add a node to a leave |
| * or node to be added is not a leaf |
| */ |
| @Override |
| public void add(Node node) { |
| if (node==null) return; |
| if( node instanceof InnerNode ) { |
| throw new IllegalArgumentException( |
| "Not allow to add an inner node: "+NodeBase.getPath(node)); |
| } |
| netlock.writeLock().lock(); |
| try { |
| Node rack = null; |
| |
| // if node only with default rack info, here we need to add default |
| // nodegroup info |
| if (NetworkTopology.DEFAULT_RACK.equals(node.getNetworkLocation())) { |
| node.setNetworkLocation(node.getNetworkLocation() + |
| NetworkTopologyWithNodeGroup.DEFAULT_NODEGROUP); |
| } |
| Node nodeGroup = getNode(node.getNetworkLocation()); |
| if (nodeGroup == null) { |
| nodeGroup = new InnerNodeWithNodeGroup(node.getNetworkLocation()); |
| } |
| rack = getNode(nodeGroup.getNetworkLocation()); |
| |
| // rack should be an innerNode and with parent. |
| // note: rack's null parent case is: node's topology only has one layer, |
| // so rack is recognized as "/" and no parent. |
| // This will be recognized as a node with fault topology. |
| if (rack != null && |
| (!(rack instanceof InnerNode) || rack.getParent() == null)) { |
| throw new IllegalArgumentException("Unexpected data node " |
| + node.toString() |
| + " at an illegal network location"); |
| } |
| if (clusterMap.add(node)) { |
| LOG.info("Adding a new node: " + NodeBase.getPath(node)); |
| if (rack == null) { |
| // We only track rack number here |
| incrementRacks(); |
| } |
| } |
| if(LOG.isDebugEnabled()) { |
| LOG.debug("NetworkTopology became:\n" + this.toString()); |
| } |
| } finally { |
| netlock.writeLock().unlock(); |
| } |
| } |
| |
| /** Remove a node |
| * Update node counter and rack counter if necessary |
| * @param node node to be removed; can be null |
| */ |
| @Override |
| public void remove(Node node) { |
| if (node==null) return; |
| if( node instanceof InnerNode ) { |
| throw new IllegalArgumentException( |
| "Not allow to remove an inner node: "+NodeBase.getPath(node)); |
| } |
| LOG.info("Removing a node: "+NodeBase.getPath(node)); |
| netlock.writeLock().lock(); |
| try { |
| if (clusterMap.remove(node)) { |
| Node nodeGroup = getNode(node.getNetworkLocation()); |
| if (nodeGroup == null) { |
| nodeGroup = new InnerNode(node.getNetworkLocation()); |
| } |
| InnerNode rack = (InnerNode)getNode(nodeGroup.getNetworkLocation()); |
| if (rack == null) { |
| numOfRacks--; |
| } |
| } |
| if(LOG.isDebugEnabled()) { |
| LOG.debug("NetworkTopology became:\n" + this.toString()); |
| } |
| } finally { |
| netlock.writeLock().unlock(); |
| } |
| } |
| |
| @Override |
| protected int getWeight(Node reader, Node node) { |
| // 0 is local, 1 is same node group, 2 is same rack, 3 is off rack |
| // Start off by initializing to off rack |
| int weight = 3; |
| if (reader != null) { |
| if (reader.equals(node)) { |
| weight = 0; |
| } else if (isOnSameNodeGroup(reader, node)) { |
| weight = 1; |
| } else if (isOnSameRack(reader, node)) { |
| weight = 2; |
| } |
| } |
| return weight; |
| } |
| |
| /** |
| * Sort nodes array by their distances to <i>reader</i>. |
| * <p/> |
| * This is the same as {@link NetworkTopology#sortByDistance(Node, Node[], |
| * int)} except with a four-level network topology which contains the |
| * additional network distance of a "node group" which is between local and |
| * same rack. |
| * |
| * @param reader Node where data will be read |
| * @param nodes Available replicas with the requested data |
| * @param activeLen Number of active nodes at the front of the array |
| */ |
| @Override |
| public void sortByDistance(Node reader, Node[] nodes, int activeLen) { |
| // If reader is not a datanode (not in NetworkTopology tree), we need to |
| // replace this reader with a sibling leaf node in tree. |
| if (reader != null && !this.contains(reader)) { |
| Node nodeGroup = getNode(reader.getNetworkLocation()); |
| if (nodeGroup != null && nodeGroup instanceof InnerNode) { |
| InnerNode parentNode = (InnerNode) nodeGroup; |
| // replace reader with the first children of its parent in tree |
| reader = parentNode.getLeaf(0, null); |
| } else { |
| return; |
| } |
| } |
| super.sortByDistance(reader, nodes, activeLen); |
| } |
| |
| /** InnerNodeWithNodeGroup represents a switch/router of a data center, rack |
| * or physical host. Different from a leaf node, it has non-null children. |
| */ |
| static class InnerNodeWithNodeGroup extends InnerNode { |
| public InnerNodeWithNodeGroup(String name, String location, |
| InnerNode parent, int level) { |
| super(name, location, parent, level); |
| } |
| |
| public InnerNodeWithNodeGroup(String name, String location) { |
| super(name, location); |
| } |
| |
| public InnerNodeWithNodeGroup(String path) { |
| super(path); |
| } |
| |
| @Override |
| boolean isRack() { |
| // it is node group |
| if (getChildren().isEmpty()) { |
| return false; |
| } |
| |
| Node firstChild = children.get(0); |
| |
| if (firstChild instanceof InnerNode) { |
| Node firstGrandChild = (((InnerNode) firstChild).children).get(0); |
| if (firstGrandChild instanceof InnerNode) { |
| // it is datacenter |
| return false; |
| } else { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| /** |
| * Judge if this node represents a node group |
| * |
| * @return true if it has no child or its children are not InnerNodes |
| */ |
| boolean isNodeGroup() { |
| if (children.isEmpty()) { |
| return true; |
| } |
| Node firstChild = children.get(0); |
| if (firstChild instanceof InnerNode) { |
| // it is rack or datacenter |
| return false; |
| } |
| return true; |
| } |
| |
| @Override |
| protected boolean isLeafParent() { |
| return isNodeGroup(); |
| } |
| |
| @Override |
| protected InnerNode createParentNode(String parentName) { |
| return new InnerNodeWithNodeGroup(parentName, getPath(this), this, |
| this.getLevel() + 1); |
| } |
| |
| @Override |
| protected boolean areChildrenLeaves() { |
| return isNodeGroup(); |
| } |
| } |
| } |