| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.pig.impl.util; |
| |
| import java.io.File; |
| import java.io.FileInputStream; |
| import java.io.FileOutputStream; |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.io.OutputStream; |
| import java.net.MalformedURLException; |
| import java.net.URI; |
| import java.net.URL; |
| import java.net.URLClassLoader; |
| import java.net.URLDecoder; |
| import java.util.ArrayList; |
| import java.util.Enumeration; |
| import java.util.HashMap; |
| import java.util.HashSet; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.Set; |
| import java.util.jar.JarEntry; |
| import java.util.jar.JarOutputStream; |
| |
| import org.antlr.runtime.CommonTokenStream; |
| import org.apache.commons.logging.Log; |
| import org.apache.commons.logging.LogFactory; |
| import org.apache.hadoop.conf.Configuration; |
| import org.apache.hadoop.fs.FileSystem; |
| import org.apache.hadoop.fs.Path; |
| import org.apache.hadoop.mapreduce.Job; |
| import org.apache.hadoop.util.StringUtils; |
| import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapReduce; |
| import org.apache.pig.impl.PigContext; |
| import org.apache.pig.org.roaringbitmap.RoaringBitmap; |
| import org.apache.tools.bzip2r.BZip2Constants; |
| import org.joda.time.DateTime; |
| |
| import com.google.common.collect.Multimaps; |
| |
| import dk.brics.automaton.Automaton; |
| |
| public class JarManager { |
| |
| private static Log log = LogFactory.getLog(JarManager.class); |
| private static final String PIGTEST_JAR = "pigtest.jar"; |
| |
| private static enum DefaultPigPackages { |
| |
| PIG(PigMapReduce.class), |
| BZIP2R(BZip2Constants.class), |
| AUTOMATON(Automaton.class), |
| ANTLR(CommonTokenStream.class), |
| JODATIME(DateTime.class), |
| SHADED_ROARING_BITMAP(RoaringBitmap.class); |
| |
| private final Class pkgClass; |
| |
| DefaultPigPackages(Class pkgClass) { |
| this.pkgClass = pkgClass; |
| } |
| |
| public Class getPkgClass() { |
| return pkgClass; |
| } |
| } |
| |
| public static File createPigScriptUDFJar(PigContext pigContext) throws IOException { |
| File scriptUDFJarFile = File.createTempFile("PigScriptUDF", ".jar"); |
| // ensure the scriptUDFJarFile is deleted on exit |
| scriptUDFJarFile.deleteOnExit(); |
| FileOutputStream fos = new FileOutputStream(scriptUDFJarFile); |
| HashMap<String, String> contents = new HashMap<String, String>(); |
| createPigScriptUDFJar(fos, pigContext, contents); |
| |
| if (!contents.isEmpty()) { |
| FileInputStream fis = null; |
| String md5 = null; |
| try { |
| fis = new FileInputStream(scriptUDFJarFile); |
| md5 = org.apache.commons.codec.digest.DigestUtils.md5Hex(fis); |
| } finally { |
| if (fis != null) { |
| fis.close(); |
| } |
| } |
| File newScriptUDFJarFile = new File(scriptUDFJarFile.getParent(), "PigScriptUDF-" + md5 + ".jar"); |
| scriptUDFJarFile.renameTo(newScriptUDFJarFile); |
| return newScriptUDFJarFile; |
| } |
| return null; |
| } |
| |
| private static void createPigScriptUDFJar(OutputStream os, PigContext pigContext, HashMap<String, String> contents) throws IOException { |
| JarOutputStream jarOutputStream = new JarOutputStream(os); |
| for (String path: pigContext.scriptFiles) { |
| log.debug("Adding entry " + path + " to job jar" ); |
| InputStream stream = null; |
| File inputFile = new File(path); |
| if (inputFile.exists()) { |
| stream = new FileInputStream(inputFile); |
| } else { |
| stream = PigContext.getClassLoader().getResourceAsStream(path); |
| } |
| if (stream==null) { |
| throw new IOException("Cannot find " + path); |
| } |
| try { |
| addStream(jarOutputStream, path, stream, contents, inputFile.lastModified()); |
| } finally { |
| stream.close(); |
| } |
| } |
| for (Map.Entry<String, File> entry : pigContext.getScriptFiles().entrySet()) { |
| log.debug("Adding entry " + entry.getKey() + " to job jar" ); |
| InputStream stream = null; |
| if (entry.getValue().exists()) { |
| stream = new FileInputStream(entry.getValue()); |
| } else { |
| stream = PigContext.getClassLoader().getResourceAsStream(entry.getValue().getPath()); |
| } |
| if (stream==null) { |
| throw new IOException("Cannot find " + entry.getValue().getPath()); |
| } |
| try { |
| addStream(jarOutputStream, entry.getKey(), stream, contents, entry.getValue().lastModified()); |
| } finally { |
| stream.close(); |
| } |
| } |
| if (!contents.isEmpty()) { |
| jarOutputStream.close(); |
| } else { |
| os.close(); |
| } |
| } |
| |
| /** |
| * Creates a Classloader based on the passed jarFile and any extra jar files. |
| * |
| * @param jarFile |
| * the jar file to be part of the newly created Classloader. This jar file plus any |
| * jars in the extraJars list will constitute the classpath. |
| * @return the new Classloader. |
| * @throws MalformedURLException |
| */ |
| static ClassLoader createCl(String jarFile, PigContext pigContext) throws MalformedURLException { |
| int len = pigContext.extraJars.size(); |
| int passedJar = jarFile == null ? 0 : 1; |
| URL urls[] = new URL[len + passedJar]; |
| if (jarFile != null) { |
| urls[0] = new URL("file:" + jarFile); |
| } |
| for (int i = 0; i < pigContext.extraJars.size(); i++) { |
| urls[i + passedJar] = new URL("file:" + pigContext.extraJars.get(i)); |
| } |
| return new URLClassLoader(urls, PigMapReduce.class.getClassLoader()); |
| } |
| |
| /** |
| * Adds a stream to a Jar file. |
| * |
| * @param os |
| * the OutputStream of the Jar file to which the stream will be added. |
| * @param name |
| * the name of the stream. |
| * @param is |
| * the stream to add. |
| * @param contents |
| * the current contents of the Jar file. (We use this to avoid adding two streams |
| * with the same name. |
| * @param timestamp |
| * timestamp of the entry |
| * @throws IOException |
| */ |
| private static void addStream(JarOutputStream os, String name, InputStream is, Map<String, String> contents, |
| long timestamp) |
| throws IOException { |
| if (contents.get(name) != null) { |
| return; |
| } |
| contents.put(name, ""); |
| JarEntry entry = new JarEntry(name); |
| entry.setTime(timestamp); |
| os.putNextEntry(entry); |
| byte buffer[] = new byte[4096]; |
| int rc; |
| while ((rc = is.read(buffer)) > 0) { |
| os.write(buffer, 0, rc); |
| } |
| } |
| |
| public static List<String> getDefaultJars() { |
| List<String> defaultJars = new ArrayList<String>(); |
| for (DefaultPigPackages pkgToSend : DefaultPigPackages.values()) { |
| String jar = findContainingJar(pkgToSend.getPkgClass()); |
| if (jar != null && !defaultJars.contains(jar)) { |
| defaultJars.add(jar); |
| } |
| } |
| return defaultJars; |
| } |
| |
| /** |
| * Find a jar that contains a class of the same name, if any. It will return a jar file, even if |
| * that is not the first thing on the class path that has a class with the same name. |
| * |
| * @param my_class |
| * the class to find |
| * @return a jar file that contains the class, or null |
| * @throws IOException |
| */ |
| public static String findContainingJar(Class my_class) { |
| ClassLoader loader = PigContext.getClassLoader(); |
| String class_file = my_class.getName().replaceAll("\\.", "/") + ".class"; |
| try { |
| Enumeration<URL> itr = null; |
| //Try to find the class in registered jars |
| if (loader instanceof URLClassLoader) { |
| itr = ((URLClassLoader) loader).findResources(class_file); |
| } |
| //Try system classloader if not URLClassLoader or no resources found in URLClassLoader |
| if (itr == null || !itr.hasMoreElements()) { |
| itr = loader.getResources(class_file); |
| } |
| for (; itr.hasMoreElements();) { |
| URL url = (URL) itr.nextElement(); |
| if ("jar".equals(url.getProtocol())) { |
| String toReturn = url.getPath(); |
| if (toReturn.startsWith("file:")) { |
| toReturn = toReturn.substring("file:".length()); |
| } |
| // URLDecoder is a misnamed class, since it actually decodes |
| // x-www-form-urlencoded MIME type rather than actual |
| // URL encoding (which the file path has). Therefore it would |
| // decode +s to ' 's which is incorrect (spaces are actually |
| // either unencoded or encoded as "%20"). Replace +s first, so |
| // that they are kept sacred during the decoding process. |
| toReturn = toReturn.replaceAll("\\+", "%2B"); |
| toReturn = URLDecoder.decode(toReturn, "UTF-8"); |
| return toReturn.replaceAll("!.*$", ""); |
| } |
| } |
| } catch (IOException e) { |
| throw new RuntimeException(e); |
| } |
| return null; |
| } |
| |
| /** |
| * Add the jars containing the given classes to the job's configuration |
| * such that JobClient will ship them to the cluster and add them to |
| * the DistributedCache |
| * |
| * @param job |
| * Job object |
| * @param classes |
| * classes to find |
| * @throws IOException |
| */ |
| public static void addDependencyJars(Job job, Class<?>... classes) |
| throws IOException { |
| Configuration conf = job.getConfiguration(); |
| FileSystem fs = FileSystem.getLocal(conf); |
| Set<String> jars = new HashSet<String>(); |
| jars.addAll(conf.getStringCollection("tmpjars")); |
| addQualifiedJarsName(fs, jars, classes); |
| if (jars.isEmpty()) |
| return; |
| conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[0]))); |
| } |
| |
| /** |
| * Add the qualified path name of jars containing the given classes |
| * |
| * @param fs |
| * FileSystem object |
| * @param jars |
| * the resolved path names to be added to this set |
| * @param classes |
| * classes to find |
| */ |
| private static void addQualifiedJarsName(FileSystem fs, Set<String> jars, Class<?>... classes) { |
| URI fsUri = fs.getUri(); |
| Path workingDir = fs.getWorkingDirectory(); |
| for (Class<?> clazz : classes) { |
| String jarName = findContainingJar(clazz); |
| if (jarName == null) { |
| log.warn("Could not find jar for class " + clazz); |
| continue; |
| } |
| jars.add(new Path(jarName).makeQualified(fsUri, workingDir).toString()); |
| } |
| } |
| |
| /** |
| * Adds jar file where pig test classes are packed (build/test/classes) |
| * @param jars Set of jars to append pig tests jar into |
| */ |
| public static void addPigTestJarIfPresent(Set<String> jars) { |
| File file = new File(PIGTEST_JAR); |
| if (file.exists()) { |
| jars.add(file.getAbsolutePath()); |
| } |
| } |
| |
| } |