blob: 344a03471cdbdc9657bf97039f50f6579aa5c1cb [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.samza.classloader;
import com.linkedin.cytodynamics.matcher.BootstrapClassPredicate;
import com.linkedin.cytodynamics.matcher.GlobMatcher;
import com.linkedin.cytodynamics.nucleus.DelegateRelationship;
import com.linkedin.cytodynamics.nucleus.DelegateRelationshipBuilder;
import com.linkedin.cytodynamics.nucleus.IsolationLevel;
import com.linkedin.cytodynamics.nucleus.LoaderBuilder;
import com.linkedin.cytodynamics.nucleus.OriginRestriction;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLClassLoader;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.lang3.StringUtils;
import org.apache.samza.SamzaException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Use this to build a classloader for running Samza which isolates the Samza framework code/dependencies from the
* application code/dependencies.
*/
public class IsolatingClassLoaderFactory {
private static final Logger LOG = LoggerFactory.getLogger(IsolatingClassLoaderFactory.class);
private static final String LIB_DIRECTORY = "lib";
/**
* Build a classloader which will isolate Samza framework code from application code. Samza framework classes and
* application-specific classes will be loaded using a different classloaders. This will enable dependencies of each
* category of classes to also be loaded separately, so that runtime dependency conflicts do not happen.
* Each call to this method will build a different instance of a classloader.
*
* Samza framework API classes need to be specified in a file called
* {@link DependencyIsolationUtils#FRAMEWORK_API_CLASS_LIST_FILE_NAME} which is in the lib directory which is in the
* API package. The file needs to be generated when building the framework API package. This class will not generate
* the file.
*
* Implementation notes:
*
* The cytodynamics isolating classloader is used for this. It provides more control than the built-in
* {@link URLClassLoader}. Cytodynamics provides the ability to compose multiple classloaders together and have more
* granular delegation strategies between the classloaders.
*
* In order to share objects between classes loaded by different classloaders, the classes for the shared objects must
* be loaded by a common classloader. Those common classes will be loaded through a common API classloader. The
* cytodynamics classloader can be set up to only use the common API classloader for an explicit set of classes. The
* {@link DependencyIsolationUtils#FRAMEWORK_API_CLASS_LIST_FILE_NAME} file should include the framework API classes.
* Also, bootstrap classes (e.g. java.lang.String) need to be loaded by a common classloader, since objects of those
* types need to be shared across different framework and application. There are also some static bootstrap classes
* which should be shared (e.g. java.lang.System). Bootstrap classes will be loaded through a common classloader by
* default.
*
* These are the classloaders which are used to make up the final classloader.
* <ul>
* <li>bootstrap classloader: Built-in Java classes (e.g. java.lang.String)</li>
* <li>API classloader: Common Samza framework API classes</li>
* <li>infrastructure classloader: Core Samza framework classes and plugins that are included in the framework</li>
* <li>
* application classloader: Application code and plugins that are needed in the app but are not included in the
* framework
* </li>
* </ul>
*
* This is the delegation structure for the classloaders:
* <pre>
* (bootstrap (API (application
* classloader) &lt;---- classloader) &lt;------- classloader)
* ^ ^
* | /
* | /
* | /
* | /
* (infrastructure classloader)
* </pre>
* The cytodynamics classloader allows control over when the delegation should happen.
* <ol>
* <li>API classloader delegates to the bootstrap classloader if the bootstrap classloader has the class.</li>
* <li>
* Infrastructure classloader only delegates to the API classloader for the common classes specified by
* {@link DependencyIsolationUtils#FRAMEWORK_API_CLASS_LIST_FILE_NAME}.
* </li>
* <li>
* Infrastructure classloader delegates to the application classloader when a class can't be found in the
* infrastructure classloader.
* </li>
* <li>
* Application classloader only delegates to the API classloader for the common classes specified by
* {@link DependencyIsolationUtils#FRAMEWORK_API_CLASS_LIST_FILE_NAME}.
* </li>
* </ol>
*/
public ClassLoader buildClassLoader() {
// start at the user.dir to find the resources for the classpaths
File baseJobDirectory = new File(System.getProperty("user.dir"));
File apiLibDirectory = libDirectory(new File(baseJobDirectory, DependencyIsolationUtils.FRAMEWORK_API_DIRECTORY));
LOG.info("Using API lib directory: {}", apiLibDirectory);
File infrastructureLibDirectory =
libDirectory(new File(baseJobDirectory, DependencyIsolationUtils.FRAMEWORK_INFRASTRUCTURE_DIRECTORY));
LOG.info("Using infrastructure lib directory: {}", infrastructureLibDirectory);
File applicationLibDirectory =
libDirectory(new File(baseJobDirectory, DependencyIsolationUtils.APPLICATION_DIRECTORY));
LOG.info("Using application lib directory: {}", applicationLibDirectory);
ClassLoader apiClassLoader = buildApiClassLoader(apiLibDirectory);
ClassLoader applicationClassLoader =
buildApplicationClassLoader(applicationLibDirectory, apiLibDirectory, apiClassLoader);
// the classloader to return is the one with the infrastructure classpath
return buildInfrastructureClassLoader(infrastructureLibDirectory, baseJobDirectory, apiLibDirectory, apiClassLoader,
applicationClassLoader);
}
/**
* Build the {@link ClassLoader} which can load framework API classes.
*
* This sets up the link between the bootstrap classloader and the API classloader (see {@link #buildClassLoader()}.
*/
private static ClassLoader buildApiClassLoader(File apiLibDirectory) {
/*
* This can just use the built-in classloading, which checks the parent classloader first and then checks its own
* classpath. A null parent means bootstrap classloader, which contains core Java classes (e.g. java.lang.String).
* This doesn't need to be isolated from the parent, because we only want to load all bootstrap classes from the
* bootstrap classloader.
*/
return new URLClassLoader(getClasspathAsURLs(apiLibDirectory), null);
}
/**
* Build the {@link ClassLoader} which can load application classes.
*
* This sets up the link between the application classloader and the API classloader (see {@link #buildClassLoader()}.
*/
private static ClassLoader buildApplicationClassLoader(File applicationLibDirectory, File apiLibDirectory,
ClassLoader apiClassLoader) {
return LoaderBuilder.anIsolatingLoader()
// look in application lib directory for JARs
.withClasspath(getClasspathAsURIs(applicationLibDirectory))
// getClasspathAsURIs should only return JARs within applicationLibDirectory anyways, but doing it to be safe
.withOriginRestriction(OriginRestriction.denyByDefault().allowingDirectory(applicationLibDirectory, false))
// delegate to the api classloader for API classes
.withParentRelationship(buildApiParentRelationship(apiLibDirectory, apiClassLoader))
.build();
}
/**
* Build the {@link ClassLoader} which can load Samza framework core classes. If a file with the name
* {@link DependencyIsolationUtils#RUNTIME_FRAMEWORK_RESOURCES_PATHING_JAR_NAME} is found in {@code baseJobDirectory},
* then it will be included in the classpath.
* This may also fall back to loading application classes.
*
* This sets up two links: One link between the infrastructure classloader and the API and another link between the
* infrastructure classloader and the application classloader (see {@link #buildClassLoader()}.
*/
private static ClassLoader buildInfrastructureClassLoader(File infrastructureLibDirectory,
File baseJobDirectory,
File apiLibDirectory,
ClassLoader apiClassLoader,
ClassLoader applicationClassLoader) {
// start with JARs in infrastructure lib directory
List<URI> classpathURIs = new ArrayList<>(getClasspathAsURIs(infrastructureLibDirectory));
OriginRestriction originRestriction = OriginRestriction.denyByDefault()
// getClasspathAsURIs should only return JARs within infrastructureLibDirectory anyways, but doing it to be safe
.allowingDirectory(infrastructureLibDirectory, false);
File runtimeFrameworkResourcesPathingJar =
new File(baseJobDirectory, DependencyIsolationUtils.RUNTIME_FRAMEWORK_RESOURCES_PATHING_JAR_NAME);
if (canAccess(runtimeFrameworkResourcesPathingJar)) {
// if there is a runtime framework resources pathing JAR, then include that in the classpath as well
classpathURIs.add(runtimeFrameworkResourcesPathingJar.toURI());
originRestriction.allowingGlobPattern(fileURL(runtimeFrameworkResourcesPathingJar).toExternalForm());
LOG.info("Added {} to infrastructure classpath", runtimeFrameworkResourcesPathingJar.getPath());
} else {
LOG.info("Unable to access {}, so not adding to infrastructure classpath",
runtimeFrameworkResourcesPathingJar.getPath());
}
return LoaderBuilder.anIsolatingLoader()
.withClasspath(Collections.unmodifiableList(classpathURIs))
.withOriginRestriction(originRestriction)
.withParentRelationship(buildApiParentRelationship(apiLibDirectory, apiClassLoader))
/*
* Fall back to the application classloader for certain classes. For example, the application might implement
* some pluggable classes (e.g. SystemFactory). Another example is message schemas that are supplied by the
* application.
*/
.addFallbackDelegate(DelegateRelationshipBuilder.builder()
.withDelegateClassLoader(applicationClassLoader)
/*
* NONE means that a class will be loaded from here if it is not found in the classpath of the loader that uses
* this relationship.
*/
.withIsolationLevel(IsolationLevel.NONE)
.build())
.build();
}
/**
* Build a {@link DelegateRelationship} which defines how to delegate to the API classloader.
*
* Delegation will only happen for classes specified in
* {@link DependencyIsolationUtils#FRAMEWORK_API_CLASS_LIST_FILE_NAME} and the Java bootstrap classes.
*/
private static DelegateRelationship buildApiParentRelationship(File apiLibDirectory, ClassLoader apiClassLoader) {
DelegateRelationshipBuilder apiParentRelationshipBuilder = DelegateRelationshipBuilder.builder()
// needs to load API classes from the API classloader
.withDelegateClassLoader(apiClassLoader)
/*
* FULL means to only load classes explicitly specified as "API" from the API classloader. We will use
* delegate-preferred class predicates to specify which classes are "API" (see below).
*/
.withIsolationLevel(IsolationLevel.FULL);
// bootstrap classes need to be loaded from a common classloader
apiParentRelationshipBuilder.addDelegatePreferredClassPredicate(new BootstrapClassPredicate());
// the classes which are Samza framework API classes are added here
getFrameworkApiClassGlobs(apiLibDirectory).forEach(
apiClassName -> apiParentRelationshipBuilder.addDelegatePreferredClassPredicate(new GlobMatcher(apiClassName)));
return apiParentRelationshipBuilder.build();
}
/**
* Gets the globs for matching against classes to load from the framework API classloader. This will read the
* {@link DependencyIsolationUtils#FRAMEWORK_API_CLASS_LIST_FILE_NAME} file in {@code directoryWithClassList} to get
* the globs.
*
* @param directoryWithClassList Directory in which
* {@link DependencyIsolationUtils#FRAMEWORK_API_CLASS_LIST_FILE_NAME} lives
* @return {@link List} of globs for matching against classes to load from the framework API classloader
*/
@VisibleForTesting
static List<String> getFrameworkApiClassGlobs(File directoryWithClassList) {
File parentPreferredFile =
new File(directoryWithClassList, DependencyIsolationUtils.FRAMEWORK_API_CLASS_LIST_FILE_NAME);
validateCanAccess(parentPreferredFile);
try {
return Files.readAllLines(Paths.get(parentPreferredFile.toURI()), StandardCharsets.UTF_8)
.stream()
.filter(StringUtils::isNotBlank)
.collect(Collectors.toList());
} catch (IOException e) {
throw new SamzaException("Error while reading samza-api class list", e);
}
}
/**
* Get the {@link URL}s of all JARs/WARs in the directory {@code jarsLocation}. This only looks one level down; it is
* not recursive.
*/
@VisibleForTesting
static URL[] getClasspathAsURLs(File jarsLocation) {
validateCanAccess(jarsLocation);
File[] filesInJarsLocation = jarsLocation.listFiles();
if (filesInJarsLocation == null) {
throw new SamzaException(
String.format("Could not find any files inside %s, probably because it is not a directory",
jarsLocation.getPath()));
}
URL[] urls = Stream.of(filesInJarsLocation)
.filter(file -> file.getName().endsWith(".jar") || file.getName().endsWith(".war"))
.map(IsolatingClassLoaderFactory::fileURL)
.toArray(URL[]::new);
LOG.info("Found {} items to load into classpath from {}", urls.length, jarsLocation);
Stream.of(urls).forEach(url -> LOG.debug("Found {} from {}", url, jarsLocation));
return urls;
}
/**
* Get the {@link URI}s of all JARs/WARs in the directory {@code jarsLocation}. This only looks one level down; it is
* not recursive.
*/
@VisibleForTesting
static List<URI> getClasspathAsURIs(File jarsLocation) {
return Stream.of(getClasspathAsURLs(jarsLocation))
.map(IsolatingClassLoaderFactory::urlToURI)
.collect(Collectors.toList());
}
private static boolean canAccess(File file) {
return file.exists() && file.canRead();
}
/**
* Makes sure that a file exists and can be read.
*/
private static void validateCanAccess(File file) {
if (!canAccess(file)) {
throw new SamzaException("Unable to access file: " + file);
}
}
/**
* Get the {@link URL} for a {@link File}.
* Converts checked exceptions into {@link SamzaException}s.
*/
private static URL fileURL(File file) {
URI uri = file.toURI();
try {
return uri.toURL();
} catch (MalformedURLException e) {
throw new SamzaException("Unable to get URL for file: " + file, e);
}
}
/**
* Get the {@link URI} for a {@link URL}.
* Converts checked exceptions into {@link SamzaException}s.
*/
private static URI urlToURI(URL url) {
try {
return url.toURI();
} catch (URISyntaxException e) {
throw new SamzaException("Unable to get URI for URL: " + url, e);
}
}
/**
* Get the {@link File} representing the {@link #LIB_DIRECTORY} inside the given {@code file}.
*/
private static File libDirectory(File file) {
return new File(file, LIB_DIRECTORY);
}
}