src/org/apache/pig/impl/util/JarManager.java - pig - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.pig.impl.util;

 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.net.MalformedURLException;
 import java.net.URI;
 import java.net.URL;
 import java.net.URLClassLoader;
 import java.net.URLDecoder;
 import java.util.ArrayList;
 import java.util.Enumeration;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.jar.JarEntry;
 import java.util.jar.JarOutputStream;

 import org.antlr.runtime.CommonTokenStream;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapReduce;
 import org.apache.pig.backend.hadoop.executionengine.shims.HadoopShims;
 import org.apache.pig.impl.PigContext;
 import org.apache.tools.bzip2r.BZip2Constants;
 import org.joda.time.DateTime;

 import com.google.common.collect.Multimaps;

 import dk.brics.automaton.Automaton;

 public class JarManager {

     private static Log log = LogFactory.getLog(JarManager.class);

     private static enum DefaultPigPackages {

         PIG(PigMapReduce.class),
         BZIP2R(BZip2Constants.class),
         AUTOMATON(Automaton.class),
         ANTLR(CommonTokenStream.class),
         GUAVA(Multimaps.class),
         JODATIME(DateTime.class);

         private final Class pkgClass;

         DefaultPigPackages(Class pkgClass) {
             this.pkgClass = pkgClass;
         }

         public Class getPkgClass() {
             return pkgClass;
         }
     }

     public static File createPigScriptUDFJar(PigContext pigContext) throws IOException {
         File scriptUDFJarFile = File.createTempFile("PigScriptUDF", ".jar");
         // ensure the scriptUDFJarFile is deleted on exit
         scriptUDFJarFile.deleteOnExit();
         FileOutputStream fos = new FileOutputStream(scriptUDFJarFile);
         HashMap<String, String> contents = new HashMap<String, String>();
         createPigScriptUDFJar(fos, pigContext, contents);

         if (!contents.isEmpty()) {
             FileInputStream fis = null;
             String md5 = null;
             try {
                 fis = new FileInputStream(scriptUDFJarFile);
                 md5 = org.apache.commons.codec.digest.DigestUtils.md5Hex(fis);
             } finally {
                 if (fis != null) {
                     fis.close();
                 }
             }
             File newScriptUDFJarFile = new File(scriptUDFJarFile.getParent(), "PigScriptUDF-" + md5 + ".jar");
             scriptUDFJarFile.renameTo(newScriptUDFJarFile);
             return newScriptUDFJarFile;
         }
         return null;
     }

     private static void createPigScriptUDFJar(OutputStream os, PigContext pigContext, HashMap<String, String> contents) throws IOException {
         JarOutputStream jarOutputStream = new JarOutputStream(os);
         for (String path: pigContext.scriptFiles) {
             log.debug("Adding entry " + path + " to job jar" );
             InputStream stream = null;
             File inputFile = new File(path);
             if (inputFile.exists()) {
                 stream = new FileInputStream(inputFile);
             } else {
                 stream = PigContext.getClassLoader().getResourceAsStream(path);
             }
             if (stream==null) {
                 throw new IOException("Cannot find " + path);
             }
             try {
                 addStream(jarOutputStream, path, stream, contents, inputFile.lastModified());
             } finally {
                 stream.close();
             }
         }
         for (Map.Entry<String, File> entry : pigContext.getScriptFiles().entrySet()) {
             log.debug("Adding entry " + entry.getKey() + " to job jar" );
             InputStream stream = null;
             if (entry.getValue().exists()) {
                 stream = new FileInputStream(entry.getValue());
             } else {
                 stream = PigContext.getClassLoader().getResourceAsStream(entry.getValue().getPath());
             }
             if (stream==null) {
                 throw new IOException("Cannot find " + entry.getValue().getPath());
             }
             try {
                 addStream(jarOutputStream, entry.getKey(), stream, contents, entry.getValue().lastModified());
             } finally {
                 stream.close();
             }
         }
         if (!contents.isEmpty()) {
             jarOutputStream.close();
         } else {
             os.close();
         }
     }

     /**
      * Creates a Classloader based on the passed jarFile and any extra jar files.
      *
      * @param jarFile
      *            the jar file to be part of the newly created Classloader. This jar file plus any
      *            jars in the extraJars list will constitute the classpath.
      * @return the new Classloader.
      * @throws MalformedURLException
      */
     static ClassLoader createCl(String jarFile, PigContext pigContext) throws MalformedURLException {
         int len = pigContext.extraJars.size();
         int passedJar = jarFile == null ? 0 : 1;
         URL urls[] = new URL[len + passedJar];
         if (jarFile != null) {
             urls[0] = new URL("file:" + jarFile);
         }
         for (int i = 0; i < pigContext.extraJars.size(); i++) {
             urls[i + passedJar] = new URL("file:" + pigContext.extraJars.get(i));
         }
         return new URLClassLoader(urls, PigMapReduce.class.getClassLoader());
     }

      /**
      * Adds a stream to a Jar file.
      *
      * @param os
      *            the OutputStream of the Jar file to which the stream will be added.
      * @param name
      *            the name of the stream.
      * @param is
      *            the stream to add.
      * @param contents
      *            the current contents of the Jar file. (We use this to avoid adding two streams
      *            with the same name.
      * @param timestamp
      *            timestamp of the entry
      * @throws IOException
      */
     private static void addStream(JarOutputStream os, String name, InputStream is, Map<String, String> contents,
             long timestamp)
             throws IOException {
         if (contents.get(name) != null) {
             return;
         }
         contents.put(name, "");
         JarEntry entry = new JarEntry(name);
         entry.setTime(timestamp);
         os.putNextEntry(entry);
         byte buffer[] = new byte[4096];
         int rc;
         while ((rc = is.read(buffer)) > 0) {
             os.write(buffer, 0, rc);
         }
     }

     public static List<String> getDefaultJars() {
         List<String> defaultJars = new ArrayList<String>();
         for (DefaultPigPackages pkgToSend : DefaultPigPackages.values()) {
             if(pkgToSend.equals(DefaultPigPackages.GUAVA) && HadoopShims.isHadoopYARN()) {
                 continue; //Skip
             }
             String jar = findContainingJar(pkgToSend.getPkgClass());
             if (!defaultJars.contains(jar)) {
                 defaultJars.add(jar);
             }
         }
         return defaultJars;
     }

     /**
      * Find a jar that contains a class of the same name, if any. It will return a jar file, even if
      * that is not the first thing on the class path that has a class with the same name.
      *
      * @param my_class
      *            the class to find
      * @return a jar file that contains the class, or null
      * @throws IOException
      */
     public static String findContainingJar(Class my_class) {
         ClassLoader loader = PigContext.getClassLoader();
         String class_file = my_class.getName().replaceAll("\\.", "/") + ".class";
         try {
             Enumeration<URL> itr = null;
             //Try to find the class in registered jars
             if (loader instanceof URLClassLoader) {
                 itr = ((URLClassLoader) loader).findResources(class_file);
             }
             //Try system classloader if not URLClassLoader or no resources found in URLClassLoader
             if (itr == null || !itr.hasMoreElements()) {
                 itr = loader.getResources(class_file);
             }
             for (; itr.hasMoreElements();) {
                 URL url = (URL) itr.nextElement();
                 if ("jar".equals(url.getProtocol())) {
                     String toReturn = url.getPath();
                     if (toReturn.startsWith("file:")) {
                         toReturn = toReturn.substring("file:".length());
                     }
                     // URLDecoder is a misnamed class, since it actually decodes
                     // x-www-form-urlencoded MIME type rather than actual
                     // URL encoding (which the file path has). Therefore it would
                     // decode +s to ' 's which is incorrect (spaces are actually
                     // either unencoded or encoded as "%20"). Replace +s first, so
                     // that they are kept sacred during the decoding process.
                     toReturn = toReturn.replaceAll("\\+", "%2B");
                     toReturn = URLDecoder.decode(toReturn, "UTF-8");
                     return toReturn.replaceAll("!.*$", "");
                 }
             }
         } catch (IOException e) {
             throw new RuntimeException(e);
         }
         return null;
     }

     /**
      * Add the jars containing the given classes to the job's configuration
      * such that JobClient will ship them to the cluster and add them to
      * the DistributedCache
      *
      * @param job
      *           Job object
      * @param classes
      *            classes to find
      * @throws IOException
      */
     public static void addDependencyJars(Job job, Class<?>... classes)
             throws IOException {
         Configuration conf = job.getConfiguration();
         FileSystem fs = FileSystem.getLocal(conf);
         Set<String> jars = new HashSet<String>();
         jars.addAll(conf.getStringCollection("tmpjars"));
         addQualifiedJarsName(fs, jars, classes);
         if (jars.isEmpty())
             return;
         conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[0])));
     }

     /**
      * Add the qualified path name of jars containing the given classes
      *
      * @param fs
      *            FileSystem object
      * @param jars
      *            the resolved path names to be added to this set
      * @param classes
      *            classes to find
      */
     private static void addQualifiedJarsName(FileSystem fs, Set<String> jars, Class<?>... classes) {
         URI fsUri = fs.getUri();
         Path workingDir = fs.getWorkingDirectory();
         for (Class<?> clazz : classes) {
             String jarName = findContainingJar(clazz);
             if (jarName == null) {
                 log.warn("Could not find jar for class " + clazz);
                 continue;
             }
             jars.add(new Path(jarName).makeQualified(fsUri, workingDir).toString());
         }
     }

 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.pig.impl.util;

	import java.io.File;
	import java.io.FileInputStream;
	import java.io.FileOutputStream;
	import java.io.IOException;
	import java.io.InputStream;
	import java.io.OutputStream;
	import java.net.MalformedURLException;
	import java.net.URI;
	import java.net.URL;
	import java.net.URLClassLoader;
	import java.net.URLDecoder;
	import java.util.ArrayList;
	import java.util.Enumeration;
	import java.util.HashMap;
	import java.util.HashSet;
	import java.util.List;
	import java.util.Map;
	import java.util.Set;
	import java.util.jar.JarEntry;
	import java.util.jar.JarOutputStream;

	import org.antlr.runtime.CommonTokenStream;
	import org.apache.commons.logging.Log;
	import org.apache.commons.logging.LogFactory;
	import org.apache.hadoop.conf.Configuration;
	import org.apache.hadoop.fs.FileSystem;
	import org.apache.hadoop.fs.Path;
	import org.apache.hadoop.mapreduce.Job;
	import org.apache.hadoop.util.StringUtils;
	import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapReduce;
	import org.apache.pig.backend.hadoop.executionengine.shims.HadoopShims;
	import org.apache.pig.impl.PigContext;
	import org.apache.tools.bzip2r.BZip2Constants;
	import org.joda.time.DateTime;

	import com.google.common.collect.Multimaps;

	import dk.brics.automaton.Automaton;

	public class JarManager {

	private static Log log = LogFactory.getLog(JarManager.class);

	private static enum DefaultPigPackages {

	PIG(PigMapReduce.class),
	BZIP2R(BZip2Constants.class),
	AUTOMATON(Automaton.class),
	ANTLR(CommonTokenStream.class),
	GUAVA(Multimaps.class),
	JODATIME(DateTime.class);

	private final Class pkgClass;

	DefaultPigPackages(Class pkgClass) {
	this.pkgClass = pkgClass;
	}

	public Class getPkgClass() {
	return pkgClass;
	}
	}

	public static File createPigScriptUDFJar(PigContext pigContext) throws IOException {
	File scriptUDFJarFile = File.createTempFile("PigScriptUDF", ".jar");
	// ensure the scriptUDFJarFile is deleted on exit
	scriptUDFJarFile.deleteOnExit();
	FileOutputStream fos = new FileOutputStream(scriptUDFJarFile);
	HashMap<String, String> contents = new HashMap<String, String>();
	createPigScriptUDFJar(fos, pigContext, contents);

	if (!contents.isEmpty()) {
	FileInputStream fis = null;
	String md5 = null;
	try {
	fis = new FileInputStream(scriptUDFJarFile);
	md5 = org.apache.commons.codec.digest.DigestUtils.md5Hex(fis);
	} finally {
	if (fis != null) {
	fis.close();
	}
	}
	File newScriptUDFJarFile = new File(scriptUDFJarFile.getParent(), "PigScriptUDF-" + md5 + ".jar");
	scriptUDFJarFile.renameTo(newScriptUDFJarFile);
	return newScriptUDFJarFile;
	}
	return null;
	}

	private static void createPigScriptUDFJar(OutputStream os, PigContext pigContext, HashMap<String, String> contents) throws IOException {
	JarOutputStream jarOutputStream = new JarOutputStream(os);
	for (String path: pigContext.scriptFiles) {
	log.debug("Adding entry " + path + " to job jar" );
	InputStream stream = null;
	File inputFile = new File(path);
	if (inputFile.exists()) {
	stream = new FileInputStream(inputFile);
	} else {
	stream = PigContext.getClassLoader().getResourceAsStream(path);
	}
	if (stream==null) {
	throw new IOException("Cannot find " + path);
	}
	try {
	addStream(jarOutputStream, path, stream, contents, inputFile.lastModified());
	} finally {
	stream.close();
	}
	}
	for (Map.Entry<String, File> entry : pigContext.getScriptFiles().entrySet()) {
	log.debug("Adding entry " + entry.getKey() + " to job jar" );
	InputStream stream = null;
	if (entry.getValue().exists()) {
	stream = new FileInputStream(entry.getValue());
	} else {
	stream = PigContext.getClassLoader().getResourceAsStream(entry.getValue().getPath());
	}
	if (stream==null) {
	throw new IOException("Cannot find " + entry.getValue().getPath());
	}
	try {
	addStream(jarOutputStream, entry.getKey(), stream, contents, entry.getValue().lastModified());
	} finally {
	stream.close();
	}
	}
	if (!contents.isEmpty()) {
	jarOutputStream.close();
	} else {
	os.close();
	}
	}

	/**
	* Creates a Classloader based on the passed jarFile and any extra jar files.
	*
	* @param jarFile
	* the jar file to be part of the newly created Classloader. This jar file plus any
	* jars in the extraJars list will constitute the classpath.
	* @return the new Classloader.
	* @throws MalformedURLException
	*/
	static ClassLoader createCl(String jarFile, PigContext pigContext) throws MalformedURLException {
	int len = pigContext.extraJars.size();
	int passedJar = jarFile == null ? 0 : 1;
	URL urls[] = new URL[len + passedJar];
	if (jarFile != null) {
	urls[0] = new URL("file:" + jarFile);
	}
	for (int i = 0; i < pigContext.extraJars.size(); i++) {
	urls[i + passedJar] = new URL("file:" + pigContext.extraJars.get(i));
	}
	return new URLClassLoader(urls, PigMapReduce.class.getClassLoader());
	}

	/**
	* Adds a stream to a Jar file.
	*
	* @param os
	* the OutputStream of the Jar file to which the stream will be added.
	* @param name
	* the name of the stream.
	* @param is
	* the stream to add.
	* @param contents
	* the current contents of the Jar file. (We use this to avoid adding two streams
	* with the same name.
	* @param timestamp
	* timestamp of the entry
	* @throws IOException
	*/
	private static void addStream(JarOutputStream os, String name, InputStream is, Map<String, String> contents,
	long timestamp)
	throws IOException {
	if (contents.get(name) != null) {
	return;
	}
	contents.put(name, "");
	JarEntry entry = new JarEntry(name);
	entry.setTime(timestamp);
	os.putNextEntry(entry);
	byte buffer[] = new byte[4096];
	int rc;
	while ((rc = is.read(buffer)) > 0) {
	os.write(buffer, 0, rc);
	}
	}

	public static List<String> getDefaultJars() {
	List<String> defaultJars = new ArrayList<String>();
	for (DefaultPigPackages pkgToSend : DefaultPigPackages.values()) {
	if(pkgToSend.equals(DefaultPigPackages.GUAVA) && HadoopShims.isHadoopYARN()) {
	continue; //Skip
	}
	String jar = findContainingJar(pkgToSend.getPkgClass());
	if (!defaultJars.contains(jar)) {
	defaultJars.add(jar);
	}
	}
	return defaultJars;
	}

	/**
	* Find a jar that contains a class of the same name, if any. It will return a jar file, even if
	* that is not the first thing on the class path that has a class with the same name.
	*
	* @param my_class
	* the class to find
	* @return a jar file that contains the class, or null
	* @throws IOException
	*/
	public static String findContainingJar(Class my_class) {
	ClassLoader loader = PigContext.getClassLoader();
	String class_file = my_class.getName().replaceAll("\\.", "/") + ".class";
	try {
	Enumeration<URL> itr = null;
	//Try to find the class in registered jars
	if (loader instanceof URLClassLoader) {
	itr = ((URLClassLoader) loader).findResources(class_file);
	}
	//Try system classloader if not URLClassLoader or no resources found in URLClassLoader
	if (itr == null \|\| !itr.hasMoreElements()) {
	itr = loader.getResources(class_file);
	}
	for (; itr.hasMoreElements();) {
	URL url = (URL) itr.nextElement();
	if ("jar".equals(url.getProtocol())) {
	String toReturn = url.getPath();
	if (toReturn.startsWith("file:")) {
	toReturn = toReturn.substring("file:".length());
	}
	// URLDecoder is a misnamed class, since it actually decodes
	// x-www-form-urlencoded MIME type rather than actual
	// URL encoding (which the file path has). Therefore it would
	// decode +s to ' 's which is incorrect (spaces are actually
	// either unencoded or encoded as "%20"). Replace +s first, so
	// that they are kept sacred during the decoding process.
	toReturn = toReturn.replaceAll("\\+", "%2B");
	toReturn = URLDecoder.decode(toReturn, "UTF-8");
	return toReturn.replaceAll("!.*$", "");
	}
	}
	} catch (IOException e) {
	throw new RuntimeException(e);
	}
	return null;
	}

	/**
	* Add the jars containing the given classes to the job's configuration
	* such that JobClient will ship them to the cluster and add them to
	* the DistributedCache
	*
	* @param job
	* Job object
	* @param classes
	* classes to find
	* @throws IOException
	*/
	public static void addDependencyJars(Job job, Class<?>... classes)
	throws IOException {
	Configuration conf = job.getConfiguration();
	FileSystem fs = FileSystem.getLocal(conf);
	Set<String> jars = new HashSet<String>();
	jars.addAll(conf.getStringCollection("tmpjars"));
	addQualifiedJarsName(fs, jars, classes);
	if (jars.isEmpty())
	return;
	conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[0])));
	}

	/**
	* Add the qualified path name of jars containing the given classes
	*
	* @param fs
	* FileSystem object
	* @param jars
	* the resolved path names to be added to this set
	* @param classes
	* classes to find
	*/
	private static void addQualifiedJarsName(FileSystem fs, Set<String> jars, Class<?>... classes) {
	URI fsUri = fs.getUri();
	Path workingDir = fs.getWorkingDirectory();
	for (Class<?> clazz : classes) {
	String jarName = findContainingJar(clazz);
	if (jarName == null) {
	log.warn("Could not find jar for class " + clazz);
	continue;
	}
	jars.add(new Path(jarName).makeQualified(fsUri, workingDir).toString());
	}
	}

	}