blob: 7e1da876df12dc02c90f3c2e137695f051bd7333 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.backend.executionengine;
import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.util.Properties;
import org.apache.pig.backend.BackendException;
import org.apache.pig.backend.datastorage.DataStorage;
import org.apache.pig.classification.InterfaceAudience;
import org.apache.pig.classification.InterfaceStability;
import org.apache.pig.impl.PigContext;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.plan.PlanException;
import org.apache.pig.impl.plan.VisitorException;
import org.apache.pig.impl.streaming.ExecutableManager;
import org.apache.pig.newplan.logical.relational.LogicalPlan;
import org.apache.pig.tools.pigstats.PigStats;
import org.apache.pig.tools.pigstats.ScriptState;
/**
*
* The main interface bridging the front end and back end of Pig. This allows Pig
* to be ran on multiple Execution Engines, and not being limited to only Hadoop
* MapReduce. The ExecutionEngines must support the following methods as these
* are all the access points for the Pig frontend for processing. Traditionally
* there is one ExecutionEngine created per processing job, but this is not necessary.
* The ExecutionEngine instance comes from the ExecType, and it can choose to reuse
* ExecutionEngine instances. All specifications for methods are listed below
* as well as expected behavior, and the ExecutionEngine must conform to these.
*
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public interface ExecutionEngine {
/**
* This method is responsible for the initialization of the ExecutionEngine.
* All necessary setup tasks and configuration should execute in the init()
* method. This method will be called via the PigContext object.
*/
public void init() throws ExecException;
/**
* Responsible for updating the properties for the ExecutionEngine. The
* update may require reinitialization of the engine, perhaps through
* another call to init() if appropriate. This decision is delegated to the
* ExecutionEngine -- that is, the caller will not call init() after
* updating the properties.
*
* The Properties passed in should replace any configuration that occurred
* from previous Properties object. The Properties object should also be
* updated to reflect the deprecation/modifications that the ExecutionEngine
* may trigger.
*
* @param newConfiguration -- Properties object holding all configuration vals
*/
public void setConfiguration(Properties newConfiguration)
throws ExecException;
/**
* Responsible for setting a specific property and value. This method may be
* called as a result of a user "SET" command in the script or elsewhere in
* Pig to set certain properties.
*
* The properties object of the PigContext should be updated with the
* property and value with deprecation/other configuration done by the
* ExecutionEngine reflected. The ExecutionEngine should also update its
* internal configuration view as well.
*
* @param property to update
* @param value to set for property
*/
public void setProperty(String property, String value);
/**
* Returns the Properties representation of the ExecutionEngine
* configuration. The Properties object returned does not have to be the
* same object between distinct calls to getConfiguration(). The ExecutionEngine
* may create a new Properties object populated with all the properties
* each time.
*/
public Properties getConfiguration();
/**
* This method is responsible for the actual execution of a LogicalPlan. No
* assumptions is made about the architecture of the ExecutionEngine, except
* that it is capable of executing the LogicalPlan representation of a
* script. The ExecutionEngine should take care of all cleanup after
* executing the logical plan and returns an implementation of PigStats that
* contains the relevant information/statistics of the execution of the
* script.
*
* @param lp -- plan to compile
* @param grpName -- group name for submission
* @param pc -- context for execution
* @throws ExecException
*/
public PigStats launchPig(LogicalPlan lp, String grpName, PigContext pc)
throws FrontendException, ExecException;
/**
* This method handles the backend processing of the Explain command. Once
* again, no assumptions is made about the architecture of the
* ExecutionEngine, except that it is capable of "explaining" the
* LogicalPlan representation of a script. The ExecutionEngine should print
* all of it's explain statements in the PrintStream provided UNLESS the
* File object is NOT null. In that case, the file is the directory for
* which the ExecutionEngine must write out the explain statements into
* semantically distinct files. For example, if the ExecutionEngine operates
* on a PhysicalPlan and an ExecutionPlan then there would be two separate
* files detailing each. The suffix param indicates the suffix of each of
* these file names.
*
* @param lp -- plan to explain
* @param pc -- context for explain processing
* @param ps -- print stream to write all output to (if dir param is null)
* @param format -- format to print explain
* @param verbose
* @param dir -- directory to write output to. if not null, write to files
* @param suffix -- if writing to files, suffix to be used for each file
*
*
* @throws PlanException
* @throws VisitorException
* @throws IOException
*/
public void explain(LogicalPlan lp, PigContext pc, PrintStream ps,
String format, boolean verbose, File dir, String suffix)
throws PlanException, VisitorException, IOException;
/**
* Returns the DataStorage the ExecutionEngine is using.
*
* @return DataStorage the ExecutionEngine is using.
*/
public DataStorage getDataStorage();
/**
* Creates a ScriptState object which will be accessible as a ThreadLocal
* variable inside the ScriptState class. This method is called when first
* initializing the ScriptState as to delegate to the ExecutionEngine the
* version of ScriptState to use to manage the execution at hand.
*
* @return ScriptState object to manage execution of the script
*/
public ScriptState instantiateScriptState();
/**
* Creates a PigStats object which will be accessible as a ThreadLocal
* variable inside the PigStats class. This method is called when first
* initializing the PigStats.
*
* @return PigStats object.
*/
public PigStats instantiatePigStats();
/**
* Returns the ExecutableManager to be used in Pig Streaming.
*
* @return ExecutableManager to be used in Pig Streaming.
*/
public ExecutableManager getExecutableManager();
/**
* This method is called when user requests to kill all jobs
* associated with the execution engine
*
* @throws BackendException
*/
public void kill() throws BackendException;
/**
* This method is called when a user requests to kill a job associated with
* the given job id. If it is not possible for a user to kill a job, throw a
* exception. It is imperative for the job id's being displayed to be unique
* such that the correct jobs are being killed when the user supplies the
* id.
*
* @throws BackendException
*/
public void killJob(String jobID) throws BackendException;
/**
* Perform any cleanup operation
*/
public void destroy();
}