| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.pig.backend.executionengine; |
| |
| import java.io.File; |
| import java.io.IOException; |
| import java.io.PrintStream; |
| import java.util.Properties; |
| |
| import org.apache.pig.backend.BackendException; |
| import org.apache.pig.backend.datastorage.DataStorage; |
| import org.apache.pig.classification.InterfaceAudience; |
| import org.apache.pig.classification.InterfaceStability; |
| import org.apache.pig.impl.PigContext; |
| import org.apache.pig.impl.logicalLayer.FrontendException; |
| import org.apache.pig.impl.plan.PlanException; |
| import org.apache.pig.impl.plan.VisitorException; |
| import org.apache.pig.impl.streaming.ExecutableManager; |
| import org.apache.pig.newplan.logical.relational.LogicalPlan; |
| import org.apache.pig.tools.pigstats.PigStats; |
| import org.apache.pig.tools.pigstats.ScriptState; |
| |
| /** |
| * |
| * The main interface bridging the front end and back end of Pig. This allows Pig |
| * to be ran on multiple Execution Engines, and not being limited to only Hadoop |
| * MapReduce. The ExecutionEngines must support the following methods as these |
| * are all the access points for the Pig frontend for processing. Traditionally |
| * there is one ExecutionEngine created per processing job, but this is not necessary. |
| * The ExecutionEngine instance comes from the ExecType, and it can choose to reuse |
| * ExecutionEngine instances. All specifications for methods are listed below |
| * as well as expected behavior, and the ExecutionEngine must conform to these. |
| * |
| */ |
| |
| |
| @InterfaceAudience.Public |
| @InterfaceStability.Evolving |
| public interface ExecutionEngine { |
| |
| /** |
| * This method is responsible for the initialization of the ExecutionEngine. |
| * All necessary setup tasks and configuration should execute in the init() |
| * method. This method will be called via the PigContext object. |
| */ |
| public void init() throws ExecException; |
| |
| /** |
| * Responsible for updating the properties for the ExecutionEngine. The |
| * update may require reinitialization of the engine, perhaps through |
| * another call to init() if appropriate. This decision is delegated to the |
| * ExecutionEngine -- that is, the caller will not call init() after |
| * updating the properties. |
| * |
| * The Properties passed in should replace any configuration that occurred |
| * from previous Properties object. The Properties object should also be |
| * updated to reflect the deprecation/modifications that the ExecutionEngine |
| * may trigger. |
| * |
| * @param newConfiguration -- Properties object holding all configuration vals |
| */ |
| public void setConfiguration(Properties newConfiguration) |
| throws ExecException; |
| |
| /** |
| * Responsible for setting a specific property and value. This method may be |
| * called as a result of a user "SET" command in the script or elsewhere in |
| * Pig to set certain properties. |
| * |
| * The properties object of the PigContext should be updated with the |
| * property and value with deprecation/other configuration done by the |
| * ExecutionEngine reflected. The ExecutionEngine should also update its |
| * internal configuration view as well. |
| * |
| * @param property to update |
| * @param value to set for property |
| */ |
| public void setProperty(String property, String value); |
| |
| /** |
| * Returns the Properties representation of the ExecutionEngine |
| * configuration. The Properties object returned does not have to be the |
| * same object between distinct calls to getConfiguration(). The ExecutionEngine |
| * may create a new Properties object populated with all the properties |
| * each time. |
| */ |
| public Properties getConfiguration(); |
| |
| /** |
| * This method is responsible for the actual execution of a LogicalPlan. No |
| * assumptions is made about the architecture of the ExecutionEngine, except |
| * that it is capable of executing the LogicalPlan representation of a |
| * script. The ExecutionEngine should take care of all cleanup after |
| * executing the logical plan and returns an implementation of PigStats that |
| * contains the relevant information/statistics of the execution of the |
| * script. |
| * |
| * @param lp -- plan to compile |
| * @param grpName -- group name for submission |
| * @param pc -- context for execution |
| * @throws ExecException |
| */ |
| public PigStats launchPig(LogicalPlan lp, String grpName, PigContext pc) |
| throws FrontendException, ExecException; |
| |
| /** |
| * This method handles the backend processing of the Explain command. Once |
| * again, no assumptions is made about the architecture of the |
| * ExecutionEngine, except that it is capable of "explaining" the |
| * LogicalPlan representation of a script. The ExecutionEngine should print |
| * all of it's explain statements in the PrintStream provided UNLESS the |
| * File object is NOT null. In that case, the file is the directory for |
| * which the ExecutionEngine must write out the explain statements into |
| * semantically distinct files. For example, if the ExecutionEngine operates |
| * on a PhysicalPlan and an ExecutionPlan then there would be two separate |
| * files detailing each. The suffix param indicates the suffix of each of |
| * these file names. |
| * |
| * @param lp -- plan to explain |
| * @param pc -- context for explain processing |
| * @param ps -- print stream to write all output to (if dir param is null) |
| * @param format -- format to print explain |
| * @param verbose |
| * @param dir -- directory to write output to. if not null, write to files |
| * @param suffix -- if writing to files, suffix to be used for each file |
| * |
| * |
| * @throws PlanException |
| * @throws VisitorException |
| * @throws IOException |
| */ |
| public void explain(LogicalPlan lp, PigContext pc, PrintStream ps, |
| String format, boolean verbose, File dir, String suffix) |
| throws PlanException, VisitorException, IOException; |
| |
| /** |
| * Returns the DataStorage the ExecutionEngine is using. |
| * |
| * @return DataStorage the ExecutionEngine is using. |
| */ |
| public DataStorage getDataStorage(); |
| |
| /** |
| * Creates a ScriptState object which will be accessible as a ThreadLocal |
| * variable inside the ScriptState class. This method is called when first |
| * initializing the ScriptState as to delegate to the ExecutionEngine the |
| * version of ScriptState to use to manage the execution at hand. |
| * |
| * @return ScriptState object to manage execution of the script |
| */ |
| public ScriptState instantiateScriptState(); |
| |
| /** |
| * Creates a PigStats object which will be accessible as a ThreadLocal |
| * variable inside the PigStats class. This method is called when first |
| * initializing the PigStats. |
| * |
| * @return PigStats object. |
| */ |
| public PigStats instantiatePigStats(); |
| |
| /** |
| * Returns the ExecutableManager to be used in Pig Streaming. |
| * |
| * @return ExecutableManager to be used in Pig Streaming. |
| */ |
| public ExecutableManager getExecutableManager(); |
| |
| /** |
| * This method is called when user requests to kill all jobs |
| * associated with the execution engine |
| * |
| * @throws BackendException |
| */ |
| public void kill() throws BackendException; |
| |
| /** |
| * This method is called when a user requests to kill a job associated with |
| * the given job id. If it is not possible for a user to kill a job, throw a |
| * exception. It is imperative for the job id's being displayed to be unique |
| * such that the correct jobs are being killed when the user supplies the |
| * id. |
| * |
| * @throws BackendException |
| */ |
| public void killJob(String jobID) throws BackendException; |
| |
| /** |
| * Perform any cleanup operation |
| */ |
| public void destroy(); |
| |
| } |