blob: 25da5c72b6f598d0612070a011aa2dfefae5d804 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.client;
import java.util.List;
import org.apache.flink.api.common.JobExecutionResult;
import org.apache.flink.api.common.JobID;
import org.apache.flink.api.common.Plan;
import org.apache.flink.api.common.PlanExecutor;
import org.apache.flink.api.common.Program;
import org.apache.flink.configuration.ConfigConstants;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.runtime.akka.AkkaUtils;
import org.apache.flink.runtime.instance.ActorGateway;
import org.apache.flink.runtime.jobgraph.JobGraph;
import org.apache.flink.optimizer.DataStatistics;
import org.apache.flink.optimizer.Optimizer;
import org.apache.flink.optimizer.dag.DataSinkNode;
import org.apache.flink.optimizer.plan.OptimizedPlan;
import org.apache.flink.optimizer.plandump.PlanJSONDumpGenerator;
import org.apache.flink.optimizer.plantranslate.JobGraphGenerator;
import org.apache.flink.runtime.messages.JobManagerMessages;
import org.apache.flink.runtime.minicluster.LocalFlinkMiniCluster;
/**
* A PlanExecutor that runs Flink programs on a local embedded Flink runtime instance.
*
* <p>By simply calling the {@link #executePlan(org.apache.flink.api.common.Plan)} method,
* this executor still start up and shut down again immediately after the program finished.</p>
*
* <p>To use this executor to execute many dataflow programs that constitute one job together,
* then this executor needs to be explicitly started, to keep running across several executions.</p>
*/
public class LocalExecutor extends PlanExecutor {
private static final boolean DEFAULT_OVERWRITE = false;
private static final int DEFAULT_TASK_MANAGER_NUM_SLOTS = -1;
/** we lock to ensure singleton execution */
private final Object lock = new Object();
/** The mini cluster on which to execute the local programs */
private LocalFlinkMiniCluster flink;
/** Custom user configuration for the execution */
private final Configuration configuration;
/** Config value for how many slots to provide in the local cluster */
private int taskManagerNumSlots = DEFAULT_TASK_MANAGER_NUM_SLOTS;
/** Config flag whether to overwrite existing files by default */
private boolean defaultOverwriteFiles = DEFAULT_OVERWRITE;
// ------------------------------------------------------------------------
public LocalExecutor() {
this(null);
}
public LocalExecutor(Configuration conf) {
this.configuration = conf != null ? conf : new Configuration();
}
// ------------------------------------------------------------------------
// Configuration
// ------------------------------------------------------------------------
public boolean isDefaultOverwriteFiles() {
return defaultOverwriteFiles;
}
public void setDefaultOverwriteFiles(boolean defaultOverwriteFiles) {
this.defaultOverwriteFiles = defaultOverwriteFiles;
}
public void setTaskManagerNumSlots(int taskManagerNumSlots) {
this.taskManagerNumSlots = taskManagerNumSlots;
}
public int getTaskManagerNumSlots() {
return this.taskManagerNumSlots;
}
// --------------------------------------------------------------------------------------------
@Override
public void start() throws Exception {
synchronized (lock) {
if (flink == null) {
// create the embedded runtime
Configuration configuration = createConfiguration();
if (this.configuration != null) {
configuration.addAll(this.configuration);
}
// start it up
flink = new LocalFlinkMiniCluster(configuration, true);
this.flink.start();
} else {
throw new IllegalStateException("The local executor was already started.");
}
}
}
@Override
public void stop() throws Exception {
synchronized (lock) {
if (flink != null) {
flink.stop();
flink = null;
}
}
}
@Override
public boolean isRunning() {
return flink != null;
}
/**
* Executes the given program on a local runtime and waits for the job to finish.
*
* <p>If the executor has not been started before, this starts the executor and shuts it down
* after the job finished. If the job runs in session mode, the executor is kept alive until
* no more references to the executor exist.</p>
*
* @param plan The plan of the program to execute.
* @return The net runtime of the program, in milliseconds.
*
* @throws Exception Thrown, if either the startup of the local execution context, or the execution
* caused an exception.
*/
@Override
public JobExecutionResult executePlan(Plan plan) throws Exception {
if (plan == null) {
throw new IllegalArgumentException("The plan may not be null.");
}
synchronized (this.lock) {
// check if we start a session dedicated for this execution
final boolean shutDownAtEnd;
if (flink == null) {
shutDownAtEnd = true;
// configure the number of local slots equal to the parallelism of the local plan
if (this.taskManagerNumSlots == DEFAULT_TASK_MANAGER_NUM_SLOTS) {
int maxParallelism = plan.getMaximumParallelism();
if (maxParallelism > 0) {
this.taskManagerNumSlots = maxParallelism;
}
}
// start the cluster for us
start();
}
else {
// we use the existing session
shutDownAtEnd = false;
}
try {
Configuration configuration = this.flink.configuration();
Optimizer pc = new Optimizer(new DataStatistics(), configuration);
OptimizedPlan op = pc.compile(plan);
JobGraphGenerator jgg = new JobGraphGenerator(configuration);
JobGraph jobGraph = jgg.compileJobGraph(op, plan.getJobId());
boolean sysoutPrint = isPrintingStatusDuringExecution();
return flink.submitJobAndWait(jobGraph, sysoutPrint);
}
finally {
if (shutDownAtEnd) {
stop();
}
}
}
}
/**
* Creates a JSON representation of the given dataflow's execution plan.
*
* @param plan The dataflow plan.
* @return The dataflow's execution plan, as a JSON string.
* @throws Exception Thrown, if the optimization process that creates the execution plan failed.
*/
@Override
public String getOptimizerPlanAsJSON(Plan plan) throws Exception {
final int parallelism = plan.getDefaultParallelism() == -1 ? 1 : plan.getDefaultParallelism();
Optimizer pc = new Optimizer(new DataStatistics(), this.configuration);
pc.setDefaultParallelism(parallelism);
OptimizedPlan op = pc.compile(plan);
return new PlanJSONDumpGenerator().getOptimizerPlanAsJSON(op);
}
@Override
public void endSession(JobID jobID) throws Exception {
LocalFlinkMiniCluster flink = this.flink;
if (flink != null) {
ActorGateway leaderGateway = flink.getLeaderGateway(AkkaUtils.getDefaultTimeout());
leaderGateway.tell(new JobManagerMessages.RemoveCachedJob(jobID));
}
}
private Configuration createConfiguration() {
Configuration configuration = new Configuration();
configuration.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, getTaskManagerNumSlots());
configuration.setBoolean(ConfigConstants.FILESYSTEM_DEFAULT_OVERWRITE_KEY, isDefaultOverwriteFiles());
return configuration;
}
// --------------------------------------------------------------------------------------------
// Static variants that internally bring up an instance and shut it down after the execution
// --------------------------------------------------------------------------------------------
/**
* Executes the given program.
*
* @param pa The program.
* @param args The parameters.
* @return The execution result of the program.
*
* @throws Exception Thrown, if either the startup of the local execution context, or the execution
* caused an exception.
*/
public static JobExecutionResult execute(Program pa, String... args) throws Exception {
return execute(pa.getPlan(args));
}
/**
* Executes the given dataflow plan.
*
* @param plan The dataflow plan.
* @return The execution result.
*
* @throws Exception Thrown, if either the startup of the local execution context, or the execution
* caused an exception.
*/
public static JobExecutionResult execute(Plan plan) throws Exception {
return new LocalExecutor().executePlan(plan);
}
/**
* Creates a JSON representation of the given dataflow's execution plan.
*
* @param plan The dataflow plan.
* @return The dataflow's execution plan, as a JSON string.
* @throws Exception Thrown, if the optimization process that creates the execution plan failed.
*/
public static String optimizerPlanAsJSON(Plan plan) throws Exception {
final int parallelism = plan.getDefaultParallelism() == -1 ? 1 : plan.getDefaultParallelism();
Optimizer pc = new Optimizer(new DataStatistics(), new Configuration());
pc.setDefaultParallelism(parallelism);
OptimizedPlan op = pc.compile(plan);
return new PlanJSONDumpGenerator().getOptimizerPlanAsJSON(op);
}
/**
* Creates a JSON representation of the given dataflow plan.
*
* @param plan The dataflow plan.
* @return The dataflow plan (prior to optimization) as a JSON string.
*/
public static String getPlanAsJSON(Plan plan) {
List<DataSinkNode> sinks = Optimizer.createPreOptimizedPlan(plan);
return new PlanJSONDumpGenerator().getPactPlanAsJSON(sinks);
}
}