gobblin-utility/src/main/java/org/apache/gobblin/util/ClustersNames.java - gobblin - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.gobblin.util;

 import com.google.common.base.Preconditions;
 import com.google.common.io.Closer;
 import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

 import java.io.IOException;
 import java.io.InputStream;
 import java.net.*;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Properties;


 /**
  * Allows conversion of URLs identifying a Hadoop cluster (e.g. resource manager url or
  * a job tracker URL) to a human-readable name.
  *
  * <p>The class will automatically load a resource named {@link #URL_TO_NAME_MAP_RESOURCE_NAME} to
  * get a default mapping. It expects this resource to be in the Java Properties file format. The
  * name of the property is the cluster URL and the value is the human-readable name.
  *
  * <p><b>IMPORTANT:</b> Don't forget to escape colons ":" in the file as those may be interpreted
  * as name/value separators.
  */
 public class ClustersNames {

   public static final String URL_TO_NAME_MAP_RESOURCE_NAME = "GobblinClustersNames.properties";
   private static final Logger LOG = LoggerFactory.getLogger(ClustersNames.class);
   private static final Configuration HADOOP_CONFIGURATION = new Configuration();

   private static ClustersNames THE_INSTANCE;

   private Properties urlToNameMap = new Properties();

   protected ClustersNames() {

     try (Closer closer = Closer.create()) {
       InputStream propsInput = closer.register(getClass().getResourceAsStream(URL_TO_NAME_MAP_RESOURCE_NAME));
       if (null == propsInput) {
         propsInput = closer.register(ClassLoader.getSystemResourceAsStream(URL_TO_NAME_MAP_RESOURCE_NAME));
       }
       if (null != propsInput) {
         try {
           this.urlToNameMap.load(propsInput);
           LOG.info("Loaded cluster names map:" + this.urlToNameMap);
         } catch (IOException e) {
           LOG.warn("Unable to load cluster names map: " + e, e);
         }
       } else {
         LOG.info("no default cluster mapping found");
       }
     } catch (IOException e) {
       LOG.warn("unable to close resource input stream for " + URL_TO_NAME_MAP_RESOURCE_NAME + ":" + e, e);
     }
   }

   /**
    * Returns human-readable name of the cluster.
    *
    * Method first checks config for exact cluster url match. If nothing is found,
    * it will also check host:port and just hostname match.
    * If it still could not find a match, hostname from the url will be returned.
    *
    * For incomplete or invalid urls, we'll return a name based on clusterUrl,
    * that will have only alphanumeric characters, dashes, underscores and dots.
    * */
   public String getClusterName(String clusterUrl) {
     if (null == clusterUrl) {
       return null;
     }

     List<String> candidates = generateUrlMatchCandidates(clusterUrl);
     for (String candidate : candidates) {
       String name = this.urlToNameMap.getProperty(candidate);
       if (name != null) {
         return name;
       }
     }

     return candidates.get(candidates.size() - 1);
   }

   public void addClusterMapping(String clusterUrl, String clusterName) {
     Preconditions.checkNotNull(clusterUrl, "cluster URL expected");
     Preconditions.checkNotNull(clusterName, "cluster name expected");
     this.urlToNameMap.put(clusterUrl, clusterName);
   }

   public void addClusterMapping(URL clusterUrl, String clusterName) {
     Preconditions.checkNotNull(clusterUrl, "cluster URL expected");
     Preconditions.checkNotNull(clusterName, "cluster name expected");
     this.urlToNameMap.put(clusterUrl.toString(), clusterName);
   }

   /**
    * @see #getClusterName(String) for logic description.
    */
   private static List<String> generateUrlMatchCandidates(String clusterIdentifier) {
     ArrayList<String> candidates = new ArrayList<>();
     candidates.add(clusterIdentifier);

     try {
       URI uri = new URI(clusterIdentifier.trim());
       if (uri.getHost() != null) {
         if (uri.getPort() != -1) {
           candidates.add(uri.getHost() + ":" + uri.getPort());
         }

         // we prefer a config entry with 'host:port', but if it's missing
         // we'll consider just 'host' config entry
         candidates.add(uri.getHost());
       } else if (uri.getScheme() != null && uri.getPath() != null) {
         // we have a scheme and a path, but not the host name
         // assuming local host
         candidates.add("localhost");
       } else {
         candidates.add(getSafeFallbackName(clusterIdentifier));
       }
     } catch (URISyntaxException e) {
       candidates.add(getSafeFallbackName(clusterIdentifier));
     }

     return candidates;
   }

   private static String getSafeFallbackName(String clusterIdentifier) {
     return clusterIdentifier.replaceAll("[^\\w-\\.]", "_");
   }

   /**
    *
    * Returns the cluster name on which the application is running. Uses default hadoop {@link Configuration} to get the
    * url of the resourceManager or jobtracker. The URL is then translated into a human readable cluster name using
    * {@link #getClusterName(String)}
    *
    * @see #getClusterName(Configuration)
    *
    */
   public String getClusterName() {
     return getClusterName(HADOOP_CONFIGURATION);
   }

   /**
    * Returns the cluster name on which the application is running. Uses Hadoop configuration passed in to get the
    * url of the resourceManager or jobtracker. The URL is then translated into a human readable cluster name using
    * {@link #getClusterName(String)}
    *
    * <p>
    * <b>MapReduce mode</b> Uses the value for "yarn.resourcemanager.address" from {@link Configuration} excluding the
    * port number.
    * </p>
    *
    * <p>
    * <b>Standalone mode (outside of hadoop)</b> Uses the Hostname of {@link InetAddress#getLocalHost()}
    * </p>
    *
    * <p>
    * Use {@link #getClusterName(String)} if you already have the cluster URL
    * </p>
    *
    * @see #getClusterName()
    * @param conf Hadoop configuration to use to get resourceManager or jobTracker URLs
    */
   public String getClusterName(Configuration conf) {
     // ResourceManager address in Hadoop2
     String clusterIdentifier = conf.get("yarn.resourcemanager.address");
     clusterIdentifier = getClusterName(clusterIdentifier);

     // If job is running outside of Hadoop (Standalone) use hostname
     // If clusterIdentifier is localhost or 0.0.0.0 use hostname
     if (clusterIdentifier == null || StringUtils.startsWithIgnoreCase(clusterIdentifier, "localhost")
         || StringUtils.startsWithIgnoreCase(clusterIdentifier, "0.0.0.0")) {
       try {
         clusterIdentifier = InetAddress.getLocalHost().getHostName();
       } catch (UnknownHostException e) {
         // Do nothing. Tag will not be generated
       }
     }

     return clusterIdentifier;
   }

   public static ClustersNames getInstance() {
     synchronized (ClustersNames.class) {
       if (null == THE_INSTANCE) {
         THE_INSTANCE = new ClustersNames();
       }
       return THE_INSTANCE;

     }
   }

 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.gobblin.util;

	import com.google.common.base.Preconditions;
	import com.google.common.io.Closer;
	import org.apache.commons.lang.StringUtils;
	import org.apache.hadoop.conf.Configuration;
	import org.slf4j.Logger;
	import org.slf4j.LoggerFactory;

	import java.io.IOException;
	import java.io.InputStream;
	import java.net.*;
	import java.util.ArrayList;
	import java.util.List;
	import java.util.Properties;


	/**
	* Allows conversion of URLs identifying a Hadoop cluster (e.g. resource manager url or
	* a job tracker URL) to a human-readable name.
	*
	* <p>The class will automatically load a resource named {@link #URL_TO_NAME_MAP_RESOURCE_NAME} to
	* get a default mapping. It expects this resource to be in the Java Properties file format. The
	* name of the property is the cluster URL and the value is the human-readable name.
	*
	* <p><b>IMPORTANT:</b> Don't forget to escape colons ":" in the file as those may be interpreted
	* as name/value separators.
	*/
	public class ClustersNames {

	public static final String URL_TO_NAME_MAP_RESOURCE_NAME = "GobblinClustersNames.properties";
	private static final Logger LOG = LoggerFactory.getLogger(ClustersNames.class);
	private static final Configuration HADOOP_CONFIGURATION = new Configuration();

	private static ClustersNames THE_INSTANCE;

	private Properties urlToNameMap = new Properties();

	protected ClustersNames() {

	try (Closer closer = Closer.create()) {
	InputStream propsInput = closer.register(getClass().getResourceAsStream(URL_TO_NAME_MAP_RESOURCE_NAME));
	if (null == propsInput) {
	propsInput = closer.register(ClassLoader.getSystemResourceAsStream(URL_TO_NAME_MAP_RESOURCE_NAME));
	}
	if (null != propsInput) {
	try {
	this.urlToNameMap.load(propsInput);
	LOG.info("Loaded cluster names map:" + this.urlToNameMap);
	} catch (IOException e) {
	LOG.warn("Unable to load cluster names map: " + e, e);
	}
	} else {
	LOG.info("no default cluster mapping found");
	}
	} catch (IOException e) {
	LOG.warn("unable to close resource input stream for " + URL_TO_NAME_MAP_RESOURCE_NAME + ":" + e, e);
	}
	}

	/**
	* Returns human-readable name of the cluster.
	*
	* Method first checks config for exact cluster url match. If nothing is found,
	* it will also check host:port and just hostname match.
	* If it still could not find a match, hostname from the url will be returned.
	*
	* For incomplete or invalid urls, we'll return a name based on clusterUrl,
	* that will have only alphanumeric characters, dashes, underscores and dots.
	* */
	public String getClusterName(String clusterUrl) {
	if (null == clusterUrl) {
	return null;
	}

	List<String> candidates = generateUrlMatchCandidates(clusterUrl);
	for (String candidate : candidates) {
	String name = this.urlToNameMap.getProperty(candidate);
	if (name != null) {
	return name;
	}
	}

	return candidates.get(candidates.size() - 1);
	}

	public void addClusterMapping(String clusterUrl, String clusterName) {
	Preconditions.checkNotNull(clusterUrl, "cluster URL expected");
	Preconditions.checkNotNull(clusterName, "cluster name expected");
	this.urlToNameMap.put(clusterUrl, clusterName);
	}

	public void addClusterMapping(URL clusterUrl, String clusterName) {
	Preconditions.checkNotNull(clusterUrl, "cluster URL expected");
	Preconditions.checkNotNull(clusterName, "cluster name expected");
	this.urlToNameMap.put(clusterUrl.toString(), clusterName);
	}

	/**
	* @see #getClusterName(String) for logic description.
	*/
	private static List<String> generateUrlMatchCandidates(String clusterIdentifier) {
	ArrayList<String> candidates = new ArrayList<>();
	candidates.add(clusterIdentifier);

	try {
	URI uri = new URI(clusterIdentifier.trim());
	if (uri.getHost() != null) {
	if (uri.getPort() != -1) {
	candidates.add(uri.getHost() + ":" + uri.getPort());
	}

	// we prefer a config entry with 'host:port', but if it's missing
	// we'll consider just 'host' config entry
	candidates.add(uri.getHost());
	} else if (uri.getScheme() != null && uri.getPath() != null) {
	// we have a scheme and a path, but not the host name
	// assuming local host
	candidates.add("localhost");
	} else {
	candidates.add(getSafeFallbackName(clusterIdentifier));
	}
	} catch (URISyntaxException e) {
	candidates.add(getSafeFallbackName(clusterIdentifier));
	}

	return candidates;
	}

	private static String getSafeFallbackName(String clusterIdentifier) {
	return clusterIdentifier.replaceAll("[^\\w-\\.]", "_");
	}

	/**
	*
	* Returns the cluster name on which the application is running. Uses default hadoop {@link Configuration} to get the
	* url of the resourceManager or jobtracker. The URL is then translated into a human readable cluster name using
	* {@link #getClusterName(String)}
	*
	* @see #getClusterName(Configuration)
	*
	*/
	public String getClusterName() {
	return getClusterName(HADOOP_CONFIGURATION);
	}

	/**
	* Returns the cluster name on which the application is running. Uses Hadoop configuration passed in to get the
	* url of the resourceManager or jobtracker. The URL is then translated into a human readable cluster name using
	* {@link #getClusterName(String)}
	*
	* <p>
	* <b>MapReduce mode</b> Uses the value for "yarn.resourcemanager.address" from {@link Configuration} excluding the
	* port number.
	* </p>
	*
	* <p>
	* <b>Standalone mode (outside of hadoop)</b> Uses the Hostname of {@link InetAddress#getLocalHost()}
	* </p>
	*
	* <p>
	* Use {@link #getClusterName(String)} if you already have the cluster URL
	* </p>
	*
	* @see #getClusterName()
	* @param conf Hadoop configuration to use to get resourceManager or jobTracker URLs
	*/
	public String getClusterName(Configuration conf) {
	// ResourceManager address in Hadoop2
	String clusterIdentifier = conf.get("yarn.resourcemanager.address");
	clusterIdentifier = getClusterName(clusterIdentifier);

	// If job is running outside of Hadoop (Standalone) use hostname
	// If clusterIdentifier is localhost or 0.0.0.0 use hostname
	if (clusterIdentifier == null \|\| StringUtils.startsWithIgnoreCase(clusterIdentifier, "localhost")
	\|\| StringUtils.startsWithIgnoreCase(clusterIdentifier, "0.0.0.0")) {
	try {
	clusterIdentifier = InetAddress.getLocalHost().getHostName();
	} catch (UnknownHostException e) {
	// Do nothing. Tag will not be generated
	}
	}

	return clusterIdentifier;
	}

	public static ClustersNames getInstance() {
	synchronized (ClustersNames.class) {
	if (null == THE_INSTANCE) {
	THE_INSTANCE = new ClustersNames();
	}
	return THE_INSTANCE;

	}
	}

	}