blob: 99630e61606d078d7a71a3c531aae5a4e66e65b4 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.sqoop.orm;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.net.URL;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.List;
import java.util.jar.JarOutputStream;
import java.util.zip.ZipEntry;
import javax.tools.JavaCompiler;
import javax.tools.JavaFileObject;
import javax.tools.StandardJavaFileManager;
import javax.tools.ToolProvider;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.sqoop.SqoopOptions;
import org.apache.hadoop.sqoop.util.FileListing;
/**
* Manages the compilation of a bunch of .java files into .class files
* and eventually a jar.
*
* Also embeds this program's jar into the lib/ directory inside the compiled jar
* to ensure that the job runs correctly.
*
*
*
*/
public class CompilationManager {
public static final Log LOG = LogFactory.getLog(CompilationManager.class.getName());
private SqoopOptions options;
private List<String> sources;
public CompilationManager(final SqoopOptions opts) {
options = opts;
sources = new ArrayList<String>();
}
public void addSourceFile(String sourceName) {
sources.add(sourceName);
}
/**
* locate the hadoop-*-core.jar in $HADOOP_HOME or --hadoop-home.
* If that doesn't work, check our classpath.
* @return the filename of the hadoop-*-core.jar file.
*/
private String findHadoopCoreJar() {
String hadoopHome = options.getHadoopHome();
if (null == hadoopHome) {
LOG.info("$HADOOP_HOME is not set");
return findJarForClass(JobConf.class);
}
if (!hadoopHome.endsWith(File.separator)) {
hadoopHome = hadoopHome + File.separator;
}
File hadoopHomeFile = new File(hadoopHome);
LOG.info("HADOOP_HOME is " + hadoopHomeFile.getAbsolutePath());
File [] entries = hadoopHomeFile.listFiles();
if (null == entries) {
LOG.warn("HADOOP_HOME appears empty or missing");
return findJarForClass(JobConf.class);
}
for (File f : entries) {
if (f.getName().startsWith("hadoop-") && f.getName().endsWith("-core.jar")) {
LOG.info("Found hadoop core jar at: " + f.getAbsolutePath());
return f.getAbsolutePath();
}
}
return findJarForClass(JobConf.class);
}
/**
* Compile the .java files into .class files via embedded javac call.
*/
public void compile() throws IOException {
List<String> args = new ArrayList<String>();
// ensure that the jar output dir exists.
String jarOutDir = options.getJarOutputDir();
File jarOutDirObj = new File(jarOutDir);
if (!jarOutDirObj.exists()) {
boolean mkdirSuccess = jarOutDirObj.mkdirs();
if (!mkdirSuccess) {
LOG.debug("Warning: Could not make directories for " + jarOutDir);
}
} else if (LOG.isDebugEnabled()) {
LOG.debug("Found existing " + jarOutDir);
}
// find hadoop-*-core.jar for classpath.
String coreJar = findHadoopCoreJar();
if (null == coreJar) {
// Couldn't find a core jar to insert into the CP for compilation.
// If, however, we're running this from a unit test, then the path
// to the .class files might be set via the hadoop.alt.classpath property
// instead. Check there first.
String coreClassesPath = System.getProperty("hadoop.alt.classpath");
if (null == coreClassesPath) {
// no -- we're out of options. Fail.
throw new IOException("Could not find hadoop core jar!");
} else {
coreJar = coreClassesPath;
}
}
// find sqoop jar for compilation classpath
String sqoopJar = findThisJar();
if (null != sqoopJar) {
sqoopJar = File.pathSeparator + sqoopJar;
} else {
LOG.warn("Could not find sqoop jar; child compilation may fail");
sqoopJar = "";
}
String curClasspath = System.getProperty("java.class.path");
String srcOutDir = new File(options.getCodeOutputDir()).getAbsolutePath();
if (!srcOutDir.endsWith(File.separator)) {
srcOutDir = srcOutDir + File.separator;
}
args.add("-sourcepath");
args.add(srcOutDir);
args.add("-d");
args.add(jarOutDir);
args.add("-classpath");
args.add(curClasspath + File.pathSeparator + coreJar + sqoopJar);
JavaCompiler compiler = ToolProvider.getSystemJavaCompiler();
StandardJavaFileManager fileManager =
compiler.getStandardFileManager(null, null, null);
ArrayList<String> srcFileNames = new ArrayList<String>();
for (String srcfile : sources) {
srcFileNames.add(srcOutDir + srcfile);
LOG.debug("Adding source file: " + srcOutDir + srcfile);
}
if (LOG.isDebugEnabled()) {
LOG.debug("Invoking javac with args:");
for (String arg : args) {
LOG.debug(" " + arg);
}
}
Iterable<? extends JavaFileObject> srcFileObjs =
fileManager.getJavaFileObjectsFromStrings(srcFileNames);
JavaCompiler.CompilationTask task = compiler.getTask(
null, // Write to stderr
fileManager,
null, // No special diagnostic handling
args,
null, // Compile all classes in the source compilation units
srcFileObjs);
boolean result = task.call();
if (!result) {
throw new IOException("Error returned by javac");
}
}
/**
* @return the complete filename of the .jar file to generate */
public String getJarFilename() {
String jarOutDir = options.getJarOutputDir();
String tableName = options.getTableName();
if (null != tableName && tableName.length() > 0) {
return jarOutDir + tableName + ".jar";
} else if (this.sources.size() == 1) {
// if we only have one source file, find it's base name,
// turn "foo.java" into "foo", and then return jarDir + "foo" + ".jar"
String srcFileName = this.sources.get(0);
String basename = new File(srcFileName).getName();
String [] parts = basename.split("\\.");
String preExtPart = parts[0];
return jarOutDir + preExtPart + ".jar";
} else {
return jarOutDir + "sqoop.jar";
}
}
/**
* Searches through a directory and its children for .class
* files to add to a jar.
*
* @param dir - The root directory to scan with this algorithm.
* @param jstream - The JarOutputStream to write .class files to.
*/
private void addClassFilesFromDir(File dir, JarOutputStream jstream)
throws IOException {
LOG.debug("Scanning for .class files in directory: " + dir);
List<File> dirEntries = FileListing.getFileListing(dir);
String baseDirName = dir.getAbsolutePath();
if (!baseDirName.endsWith(File.separator)) {
baseDirName = baseDirName + File.separator;
}
// for each input class file, create a zipfile entry for it,
// read the file into a buffer, and write it to the jar file.
for (File entry : dirEntries) {
if (!entry.isDirectory()) {
// chomp off the portion of the full path that is shared
// with the base directory where class files were put;
// we only record the subdir parts in the zip entry.
String fullPath = entry.getAbsolutePath();
String chompedPath = fullPath.substring(baseDirName.length());
boolean include = chompedPath.endsWith(".class")
&& sources.contains(
chompedPath.substring(0, chompedPath.length() - ".class".length()) + ".java");
if (include) {
// include this file.
LOG.debug("Got classfile: " + entry.getPath() + " -> " + chompedPath);
ZipEntry ze = new ZipEntry(chompedPath);
jstream.putNextEntry(ze);
copyFileToStream(entry, jstream);
jstream.closeEntry();
}
}
}
}
/**
* Create an output jar file to use when executing MapReduce jobs
*/
public void jar() throws IOException {
String jarOutDir = options.getJarOutputDir();
String jarFilename = getJarFilename();
LOG.info("Writing jar file: " + jarFilename);
File jarFileObj = new File(jarFilename);
if (jarFileObj.exists()) {
LOG.debug("Found existing jar (" + jarFilename + "); removing.");
if (!jarFileObj.delete()) {
LOG.warn("Could not remove existing jar file: " + jarFilename);
}
}
FileOutputStream fstream = null;
JarOutputStream jstream = null;
try {
fstream = new FileOutputStream(jarFilename);
jstream = new JarOutputStream(fstream);
addClassFilesFromDir(new File(jarOutDir), jstream);
// put our own jar in there in its lib/ subdir
String thisJarFile = findThisJar();
if (null != thisJarFile) {
File thisJarFileObj = new File(thisJarFile);
String thisJarBasename = thisJarFileObj.getName();
String thisJarEntryName = "lib" + File.separator + thisJarBasename;
ZipEntry ze = new ZipEntry(thisJarEntryName);
jstream.putNextEntry(ze);
copyFileToStream(thisJarFileObj, jstream);
jstream.closeEntry();
} else {
// couldn't find our own jar (we were running from .class files?)
LOG.warn("Could not find jar for Sqoop; MapReduce jobs may not run correctly.");
}
jstream.finish();
} finally {
if (null != jstream) {
try {
jstream.close();
} catch (IOException ioe) {
LOG.warn("IOException closing jar stream: " + ioe.toString());
}
}
if (null != fstream) {
try {
fstream.close();
} catch (IOException ioe) {
LOG.warn("IOException closing file stream: " + ioe.toString());
}
}
}
LOG.debug("Finished writing jar file " + jarFilename);
}
private static final int BUFFER_SZ = 4096;
/**
* utility method to copy a .class file into the jar stream.
* @param f
* @param ostream
* @throws IOException
*/
private void copyFileToStream(File f, OutputStream ostream) throws IOException {
FileInputStream fis = new FileInputStream(f);
byte [] buffer = new byte[BUFFER_SZ];
try {
while (true) {
int bytesReceived = fis.read(buffer);
if (bytesReceived < 1) {
break;
}
ostream.write(buffer, 0, bytesReceived);
}
} finally {
fis.close();
}
}
private String findThisJar() {
return findJarForClass(CompilationManager.class);
}
// method mostly cloned from o.a.h.mapred.JobConf.findContainingJar()
private String findJarForClass(Class<? extends Object> classObj) {
ClassLoader loader = classObj.getClassLoader();
String classFile = classObj.getName().replaceAll("\\.", "/") + ".class";
try {
for (Enumeration<URL> itr = loader.getResources(classFile);
itr.hasMoreElements();) {
URL url = (URL) itr.nextElement();
if ("jar".equals(url.getProtocol())) {
String toReturn = url.getPath();
if (toReturn.startsWith("file:")) {
toReturn = toReturn.substring("file:".length());
}
// URLDecoder is a misnamed class, since it actually decodes
// x-www-form-urlencoded MIME type rather than actual
// URL encoding (which the file path has). Therefore it would
// decode +s to ' 's which is incorrect (spaces are actually
// either unencoded or encoded as "%20"). Replace +s first, so
// that they are kept sacred during the decoding process.
toReturn = toReturn.replaceAll("\\+", "%2B");
toReturn = URLDecoder.decode(toReturn, "UTF-8");
return toReturn.replaceAll("!.*$", "");
}
}
} catch (IOException e) {
throw new RuntimeException(e);
}
return null;
}
}