src/org/apache/pig/PigServer.java - pig - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.pig;

 import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.PrintStream;
 import java.io.StringReader;
 import java.lang.reflect.Constructor;
 import java.lang.reflect.Method;
 import java.net.URL;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Deque;
 import java.util.Enumeration;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 import java.util.Properties;
 import java.util.Queue;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicInteger;

 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.compress.BZip2Codec;
 import org.apache.log4j.Level;
 import org.apache.log4j.Logger;
 import org.apache.pig.backend.datastorage.ContainerDescriptor;
 import org.apache.pig.backend.datastorage.DataStorage;
 import org.apache.pig.backend.datastorage.ElementDescriptor;
 import org.apache.pig.backend.executionengine.ExecException;
 import org.apache.pig.backend.executionengine.ExecJob;
 import org.apache.pig.backend.executionengine.ExecJob.JOB_STATUS;
 import org.apache.pig.backend.hadoop.PigATSClient;
 import org.apache.pig.backend.hadoop.executionengine.HJob;
 import org.apache.pig.builtin.PigStorage;
 import org.apache.pig.classification.InterfaceAudience;
 import org.apache.pig.classification.InterfaceStability;
 import org.apache.pig.data.DataBag;
 import org.apache.pig.data.Tuple;
 import org.apache.pig.impl.PigContext;
 import org.apache.pig.impl.io.FileLocalizer;
 import org.apache.pig.impl.io.FileLocalizer.FetchFileRet;
 import org.apache.pig.impl.io.compress.BZip2CodecWithExtensionBZ;
 import org.apache.pig.impl.logicalLayer.FrontendException;
 import org.apache.pig.impl.logicalLayer.schema.Schema;
 import org.apache.pig.impl.streaming.StreamingCommand;
 import org.apache.pig.impl.util.LogUtils;
 import org.apache.pig.impl.util.PropertiesUtil;
 import org.apache.pig.impl.util.UDFContext;
 import org.apache.pig.impl.util.UriUtil;
 import org.apache.pig.impl.util.Utils;
 import org.apache.pig.newplan.DependencyOrderWalker;
 import org.apache.pig.newplan.Operator;
 import org.apache.pig.newplan.logical.Util;
 import org.apache.pig.newplan.logical.expression.LogicalExpressionPlan;
 import org.apache.pig.newplan.logical.expression.LogicalExpressionVisitor;
 import org.apache.pig.newplan.logical.expression.ScalarExpression;
 import org.apache.pig.newplan.logical.optimizer.AllExpressionVisitor;
 import org.apache.pig.newplan.logical.relational.LOForEach;
 import org.apache.pig.newplan.logical.relational.LOLoad;
 import org.apache.pig.newplan.logical.relational.LOStore;
 import org.apache.pig.newplan.logical.relational.LogicalPlan;
 import org.apache.pig.newplan.logical.relational.LogicalPlanData;
 import org.apache.pig.newplan.logical.relational.LogicalRelationalOperator;
 import org.apache.pig.newplan.logical.relational.LogicalSchema;
 import org.apache.pig.parser.QueryParserDriver;
 import org.apache.pig.parser.QueryParserUtils;
 import org.apache.pig.pen.ExampleGenerator;
 import org.apache.pig.scripting.ScriptEngine;
 import org.apache.pig.tools.grunt.GruntParser;
 import org.apache.pig.tools.pigstats.EmptyPigStats;
 import org.apache.pig.tools.pigstats.JobStats;
 import org.apache.pig.tools.pigstats.OutputStats;
 import org.apache.pig.tools.pigstats.PigStats;
 import org.apache.pig.tools.pigstats.PigStats.JobGraph;
 import org.apache.pig.tools.pigstats.ScriptState;
 import org.apache.pig.validator.BlackAndWhitelistFilter;
 import org.apache.pig.validator.PigCommandFilter;

 import com.google.common.annotations.VisibleForTesting;

 /**
  *
  * A class for Java programs to connect to Pig. Typically a program will create a PigServer
  * instance. The programmer then registers queries using registerQuery() and
  * retrieves results using openIterator() or store(). After doing so, the
  * shutdown() method should be called to free any resources used by the current
  * PigServer instance. Not doing so could result in a memory leak.
  *
  */
 @InterfaceAudience.Public
 @InterfaceStability.Stable
 public class PigServer {

     protected final Log log = LogFactory.getLog(getClass());

     public static final String PRETTY_PRINT_SCHEMA_PROPERTY = "pig.pretty.print.schema";
     private static final String PIG_LOCATION_CHECK_STRICT = "pig.location.check.strict";

     /*
      * The data structure to support grunt shell operations.
      * The grunt shell can only work on one graph at a time.
      * If a script is contained inside another script, the grunt
      * shell first saves the current graph on the stack and works
      * on a new graph. After the nested script is done, the grunt
      * shell pops up the saved graph and continues working on it.
      */
     protected final Deque<Graph> graphs = new LinkedList<Graph>();

     /*
      * The current Graph the grunt shell is working on.
      */
     private Graph currDAG;

     protected final PigContext pigContext;

     private String jobName;

     private String jobPriority;

     private final static AtomicInteger scopeCounter = new AtomicInteger(0);

     protected final String scope = constructScope();

     private boolean validateEachStatement = false;
     private boolean skipParseInRegisterForBatch = false;

     private final BlackAndWhitelistFilter filter;

     private String constructScope() {
         // scope servers for now as a session id

         // String user = System.getProperty("user.name", "DEFAULT_USER_ID");
         // String date = (new Date()).toString();

         // scope is not really used in the system right now. It will
         // however make your explain statements look lengthy if set to
         // username-date. For now let's simplify the scope, if a real
         // scope is needed again, we might need to update all the
         // operators to not include scope in their name().
         return "" + scopeCounter.incrementAndGet();
     }

     @VisibleForTesting
     public static void resetScope() {
         scopeCounter.set(0);
     }

     /**
      * @param execTypeString can be 'mapreduce' or 'local'.  Local mode will
      * use Hadoop's local job runner to execute the job on the local machine.
      * Mapreduce mode will connect to a cluster to execute the job. If
      * execTypeString is not one of these two, Pig will deduce the ExecutionEngine
      * if it is on the classpath and use it for the backend execution.
      * @throws ExecException
      * @throws IOException
      */
     public PigServer(String execTypeString) throws ExecException, IOException {
         this(addExecTypeProperty(PropertiesUtil.loadDefaultProperties(), execTypeString));
     }

     public PigServer(String execTypeString, Properties properties) throws ExecException, IOException {
         this(addExecTypeProperty(properties, execTypeString));
     }

     public PigServer(Properties properties) throws ExecException, IOException {
         this(new PigContext(properties));
     }

     private static Properties addExecTypeProperty(Properties properties, String execType) {
         properties.setProperty("exectype", execType);
         return properties;
     }

     /**
      * @param execType execution type to start the engine.  Local mode will
      * use Hadoop's local job runner to execute the job on the local machine.
      * Mapreduce mode will connect to a cluster to execute the job.
      * @throws ExecException
      */
     public PigServer(ExecType execType) throws ExecException {
         this(execType, PropertiesUtil.loadDefaultProperties());
     }

     public PigServer(ExecType execType, Properties properties) throws ExecException {
         this(new PigContext(execType, properties));
     }

     public PigServer(ExecType execType, Configuration conf) throws ExecException {
         this(new PigContext(execType, conf));
     }

     public PigServer(PigContext context) throws ExecException {
         this(context, true);
     }

     public PigServer(PigContext context, boolean connect) throws ExecException {
         this.pigContext = context;
         currDAG = new Graph(false);

         jobName = pigContext.getProperties().getProperty(
                 PigContext.JOB_NAME,
                 PigContext.JOB_NAME_PREFIX + ":DefaultJobName");

         if (connect) {
             pigContext.connect();
         }

         this.filter = new BlackAndWhitelistFilter(this);

         addHadoopProperties();
         addJarsFromProperties();
         markPredeployedJarsFromProperties();

         if (ScriptState.get() == null) {
             // If Pig was started via command line, ScriptState should have been
             // already initialized in Main. If so, we should not overwrite it.
             ScriptState.start(pigContext.getExecutionEngine().instantiateScriptState());
         }
         PigStats.start(pigContext.getExecutionEngine().instantiatePigStats());

         // log ATS event includes the caller context
         String auditId = PigATSClient.getPigAuditId(pigContext);
         String callerId = (String)pigContext.getProperties().get(PigConfiguration.PIG_LOG_TRACE_ID);
         log.info("Pig Script ID for the session: " + auditId);
         if (callerId != null) {
             log.info("Caller ID for session: " + callerId);
         }
         if (Boolean.parseBoolean(pigContext.getProperties()
                 .getProperty(PigConfiguration.PIG_ATS_ENABLED))) {
             if (Boolean.parseBoolean(pigContext.getProperties()
                     .getProperty("yarn.timeline-service.enabled", "false"))) {
                 PigATSClient.ATSEvent event = new PigATSClient.ATSEvent(auditId, callerId);
                 try {
                     PigATSClient.getInstance().logEvent(event);
                 } catch (Exception e) {
                     log.warn("Error posting to ATS: ", e);
                 }
             } else {
                 log.warn("ATS is disabled since"
                         + " yarn.timeline-service.enabled set to false");
             }

         }

         // set hdfs caller context
         Class callerContextClass = null;
         try {
             callerContextClass = Class.forName("org.apache.hadoop.ipc.CallerContext");
         } catch (ClassNotFoundException e) {
             // If pre-Hadoop 2.8.0, skip setting CallerContext
         }
         if (callerContextClass != null) {
             try {
                 // Reflection for the following code since it is only available since hadoop 2.8.0:
                 // CallerContext hdfsContext = new CallerContext.Builder(auditId).build();
                 // CallerContext.setCurrent(hdfsContext);
                 Class callerContextBuilderClass = Class.forName("org.apache.hadoop.ipc.CallerContext$Builder");
                 Constructor callerContextBuilderConstruct = callerContextBuilderClass.getConstructor(String.class);
                 Object builder = callerContextBuilderConstruct.newInstance(auditId);
                 Method builderBuildMethod = builder.getClass().getMethod("build");
                 Object hdfsContext = builderBuildMethod.invoke(builder);
                 Method callerContextSetCurrentMethod = callerContextClass.getMethod("setCurrent", hdfsContext.getClass());
                 callerContextSetCurrentMethod.invoke(callerContextClass, hdfsContext);
             } catch (Exception e) {
                 // Shall not happen unless API change in future Hadoop commons
                 throw new ExecException(e);
             }
         }
     }

     private void addHadoopProperties() throws ExecException {
         // For BZip input on hadoop 0.23/2.X
         // with PIG_BZIP_USE_HADOOP_INPUTFORMAT turned on,
         // PigTextInputFormat depends on hadoop's TextInputFormat
         // for handling bzip2 input. One problem is it only recognize 'bz2'
         // as extension and not 'bz'.
         // Adding custom BZip2 codec that returns 'bz' as extension
         // for backward compatibility.
         String codecs =
             pigContext.getProperties().getProperty("io.compression.codecs");

         if( codecs != null
             && codecs.contains(BZip2Codec.class.getCanonicalName() ) ) {
             pigContext.getProperties().setProperty("io.compression.codecs",
                 codecs + ","
                 + BZip2CodecWithExtensionBZ.class.getCanonicalName() );
         }
     }

     private void addJarsFromProperties() throws ExecException {
         //add jars from properties to extraJars
         String jar_str = pigContext.getProperties().getProperty("pig.additional.jars");
         if (jar_str==null) {
             jar_str = "";
         }
         jar_str = jar_str.replaceAll(File.pathSeparator, ",");
         if (!jar_str.isEmpty()) {
             jar_str += ",";
         }

         String jar_str_comma = pigContext.getProperties().getProperty("pig.additional.jars.uris");
         if (jar_str_comma!=null && !jar_str_comma.isEmpty()) {
             jar_str = jar_str + jar_str_comma;
         }

         if(jar_str != null && !jar_str.isEmpty()){
             // Use File.pathSeparator (":" on Linux, ";" on Windows)
             // to correctly handle path aggregates as they are represented
             // on the Operating System.
             for(String jar : jar_str.split(",")){
                 try {
                     registerJar(jar);
                 } catch (IOException e) {
                     int errCode = 4010;
                     String msg =
                         "Failed to register jar :" + jar + ". Caught exception.";
                     throw new ExecException(
                             msg,
                             errCode,
                             PigException.USER_ENVIRONMENT,
                             e
                     );
                 }
             }
         }
     }

     private void markPredeployedJarsFromProperties() throws ExecException {
         // mark jars as predeployed from properties
         String jar_str = pigContext.getProperties().getProperty("pig.predeployed.jars");

         if(jar_str != null){
             // Use File.pathSeparator (":" on Linux, ";" on Windows)
             // to correctly handle path aggregates as they are represented
             // on the Operating System.
             for(String jar : jar_str.split(File.pathSeparator)){
                 if (jar.length() > 0) {
                     pigContext.markJarAsPredeployed(jar);
                 }
             }
         }
     }

     public PigContext getPigContext(){
         return pigContext;
     }

     /**
      * Current DAG
      *
      * @return
      */
     public Graph getCurrentDAG() {
         return this.currDAG;
     }
     /**
      * Set the logging level to DEBUG.
      */
     public void debugOn() {
         Logger.getLogger("org.apache.pig").setLevel(Level.DEBUG);
         pigContext.getLog4jProperties().setProperty("log4j.logger.org.apache.pig", Level.DEBUG.toString());
     }

     /**
      * Set the logging level to the default.
      */
     public void debugOff() {
         Logger.getLogger("org.apache.pig").setLevel(pigContext.getDefaultLogLevel());
         pigContext.getLog4jProperties().setProperty("log4j.logger.org.apache.pig", pigContext.getDefaultLogLevel().toString());
     }

     /**
      * Set the default parallelism for this job
      * @param p default number of reducers to use for this job.
      */
     public void setDefaultParallel(int p) {
         pigContext.defaultParallel = p;
     }

     /**
      * Starts batch execution mode.
      */
     public void setBatchOn() {
         log.debug("Create a new graph.");

         if (currDAG != null) {
             graphs.push(currDAG);
         }
         currDAG = new Graph(true);
     }

     /**
      * Retrieve the current execution mode.
      *
      * @return true if the execution mode is batch; false otherwise.
      */
     public boolean isBatchOn() {
         // Batch is on when there are multiple graphs on the
         // stack. That gives the right response even if multiquery was
         // turned off.
         return graphs.size() > 0;
     }

     /**
      * Returns whether there is anything to process in the current batch.
      * @throws FrontendException
      * @return true if there are no stores to process in the current
      * batch, false otherwise.
      */
     public boolean isBatchEmpty() throws FrontendException {
         if (currDAG == null) {
             int errCode = 1083;
             String msg = "setBatchOn() must be called first.";
             throw new FrontendException(msg, errCode, PigException.INPUT);
         }

         return currDAG.isBatchEmpty();
     }

     /**
      * This method parses the scripts and builds the LogicalPlan. This method
      * should be followed by {@link PigServer#executeBatch(boolean)} with
      * argument as false. Do Not use {@link PigServer#executeBatch()} after
      * calling this method as that will re-parse and build the script.
      *
      * @throws IOException
      */
     public void parseAndBuild() throws IOException {
         if (currDAG == null || !isBatchOn()) {
             int errCode = 1083;
             String msg = "setBatchOn() must be called first.";
             throw new FrontendException(msg, errCode, PigException.INPUT);
         }
         currDAG.parseQuery();
         currDAG.buildPlan( null );
     }

     /**
      * Submits a batch of Pig commands for execution.
      *
      * @return list of jobs being executed
      * @throws IOException
      */
     public List<ExecJob> executeBatch() throws IOException {
         return executeBatch(true);
     }

     /**
      * Submits a batch of Pig commands for execution. Parse and build of script
      * should be skipped if user called {@link PigServer#parseAndBuild()}
      * before. Pass false as an argument in which case.
      *
      * @param parseAndBuild
      * @return
      * @throws IOException
      */
     public List<ExecJob> executeBatch(boolean parseAndBuild) throws IOException {
         if (parseAndBuild) {
             parseAndBuild();
         }

         PigStats stats = execute();

         return getJobs(stats);
     }

     /**
      * Retrieves a list of Job objects from the PigStats object
      * @param stats
      * @return A list of ExecJob objects
      */
     protected List<ExecJob> getJobs(PigStats stats) {
         LinkedList<ExecJob> jobs = new LinkedList<ExecJob>();
         if (stats instanceof EmptyPigStats) {
             HJob job = new HJob(HJob.JOB_STATUS.COMPLETED, pigContext, stats.result(null)
                     .getPOStore(), null);
             jobs.add(job);
             return jobs;
         }
         JobGraph jGraph = stats.getJobGraph();
         Iterator<JobStats> iter = jGraph.iterator();
         while (iter.hasNext()) {
             JobStats js = iter.next();
             for (OutputStats output : js.getOutputs()) {
                 if (js.isSuccessful()) {
                     jobs.add(new HJob(HJob.JOB_STATUS.COMPLETED, pigContext, output
                             .getPOStore(), output.getAlias(), stats));
                 } else {
                     HJob hjob = new HJob(HJob.JOB_STATUS.FAILED, pigContext, output
                             .getPOStore(), output.getAlias(), stats);
                     hjob.setException(js.getException());
                     jobs.add(hjob);
                 }
             }
         }
         return jobs;
     }

     /**
      * Discards a batch of Pig commands.
      *
      * @throws FrontendException
      */
     public void discardBatch() throws FrontendException {
         if (currDAG == null || !isBatchOn()) {
             int errCode = 1083;
             String msg = "setBatchOn() must be called first.";
             throw new FrontendException(msg, errCode, PigException.INPUT);
         }

         currDAG = graphs.pop();
     }

     /**
      * Add a path to be skipped while automatically shipping binaries for
      * streaming.
      *
      * @param path path to be skipped
      */
     public void addPathToSkip(String path) {
         pigContext.addPathToSkip(path);
     }

     /**
      * Defines an alias for the given function spec. This
      * is useful for functions that require arguments to the
      * constructor.
      *
      * @param function - the new function alias to define.
      * @param funcSpec - the FuncSpec object representing the name of
      * the function class and any arguments to constructor.
      */
     public void registerFunction(String function, FuncSpec funcSpec) {
         pigContext.registerFunction(function, funcSpec);
     }

     /**
      * Defines an alias for the given streaming command.
      *
      * @param commandAlias - the new command alias to define
      * @param command - streaming command to be executed
      */
     public void registerStreamingCommand(String commandAlias, StreamingCommand command) {
         pigContext.registerStreamCmd(commandAlias, command);
     }

     private URL locateJarFromResources(String jarName) throws IOException {
         Enumeration<URL> urls = ClassLoader.getSystemResources(jarName);
         URL resourceLocation = null;

         if (urls.hasMoreElements()) {
             resourceLocation = urls.nextElement();
         }

         if (urls.hasMoreElements()) {
             StringBuffer sb = new StringBuffer("Found multiple resources that match ");
             sb.append(jarName);
             sb.append(": ");
             sb.append(resourceLocation);

             while (urls.hasMoreElements()) {
                 sb.append(urls.nextElement());
                 sb.append("; ");
             }

             log.debug(sb.toString());
         }

         return resourceLocation;
     }

     /**
      * Registers a jar file. Name of the jar file can be an absolute or
      * relative path.
      *
      * If multiple resources are found with the specified name, the
      * first one is registered as returned by getSystemResources.
      * A warning is issued to inform the user.
      *
      * @param name of the jar file to register
      * @throws IOException
      */
     public void registerJar(String name) throws IOException {
         // Check if this operation is permitted
         filter.validate(PigCommandFilter.Command.REGISTER);

         if (pigContext.hasJar(name)) {
             log.debug("Ignoring duplicate registration for jar " + name);
             return;
         }

         // first try to locate jar via system resources
         // if this fails, try by using "name" as File (this preserves
         // compatibility with case when user passes absolute path or path
         // relative to current working directory.)
         if (name != null) {
             if (name.isEmpty()) {
                 log.warn("Empty string specified for jar path");
                 return;
             }

             URL resource = locateJarFromResources(name);

             if (resource == null) {
                 FetchFileRet[] files = FileLocalizer.fetchFiles(pigContext.getProperties(), name);
                 for (FetchFileRet file : files) {
                     File f = file.file;
                     if (!f.canRead()) {
                         int errCode = 4002;
                         String msg = "Can't read jar file: " + name;
                         throw new FrontendException(msg, errCode, PigException.USER_ENVIRONMENT);
                     }

                     pigContext.addJar(f.toURI().toURL(), name);
                 }
             } else {
                 pigContext.addJar(resource, name);
             }
         }
     }

     /**
      * Universal Scripting Language Support, see PIG-928
      *
      * @param path path of the script file
      * @param scriptingLang language keyword or scriptingEngine used to interpret the script
      * @param namespace namespace defined for functions of this script
      * @throws IOException
      */
     public void registerCode(String path, String scriptingLang, String namespace)
                              throws IOException {
         if (pigContext.scriptingUDFs.containsKey(path) &&
             pigContext.scriptingUDFs.get(path).equals(namespace)) {
             log.debug("Ignoring duplicate registration for scripting udf file " + path + " in namespace " + namespace);
             return;
         } else {
             pigContext.scriptingUDFs.put(path, namespace);
         }

         FetchFileRet ret = FileLocalizer.fetchFile(pigContext.getProperties(), path);
         File f = ret.file;
         if (!f.canRead()) {
             int errCode = 4002;
             String msg = "Can't read file: " + path;
             throw new FrontendException(msg, errCode,
                     PigException.USER_ENVIRONMENT);
         }
         String cwd = new File(".").getCanonicalPath();
         String filePath = f.getCanonicalPath();
         String nameInJar = filePath;
         // Use the relative path in the jar, if the path specified is relative
         if (!ret.didFetch) {
             if (!new File(path).isAbsolute() && path.indexOf("." + File.separator) == -1) {
                 // In case of Oozie, the localized files are in a different
                 // directory symlinked to the current directory. Canonical path will not point to cwd.
                 nameInJar = path;
             } else if (filePath.equals(cwd + File.separator + path)) {
                 // If user specified absolute path and it refers to cwd
                 nameInJar = filePath.substring(cwd.length() + 1);
             }
         }

         pigContext.addScriptFile(nameInJar, filePath);
         if(scriptingLang != null) {
             ScriptEngine se = ScriptEngine.getInstance(scriptingLang);
             se.registerFunctions(nameInJar, namespace, pigContext);
         }
     }

     /**
      * Register a query with the Pig runtime. The query is parsed and registered, but it is not
      * executed until it is needed.
      *
      * @param query
      *            a Pig Latin expression to be evaluated.
      * @param startLine
      *            line number of the query within the whole script
      * @throws IOException
      */
     public void registerQuery(String query, int startLine) throws IOException {
         currDAG.registerQuery(query, startLine, validateEachStatement, skipParseInRegisterForBatch);
     }

     /**
      * Register a query with the Pig runtime. The query is parsed and registered, but it is not
      * executed until it is needed.  Equivalent to calling {@link #registerQuery(String, int)}
      * with startLine set to 1.
      *
      * @param query
      *            a Pig Latin expression to be evaluated.
      * @throws IOException
      */
     public void registerQuery(String query) throws IOException {
         registerQuery(query, 1);
     }

     /**
      * Register a pig script from InputStream source which is more general and extensible
      * the pig script can be from local file, then you can use FileInputStream.
      * or pig script can be in memory which you build it dynamically, the you can use ByteArrayInputStream
      * even pig script can be in remote machine, which you get wrap it as SocketInputStream
      * @param in
      * @throws IOException
      */
     public void registerScript(InputStream in) throws IOException{
         registerScript(in, null, null);
     }

     /**
      * Register a pig script from InputStream source which is more general and extensible
      * the pig script can be from local file, then you can use FileInputStream.
      * or pig script can be in memory which you build it dynamically, the you can use ByteArrayInputStream
      * even pig script can be in remote machine, which you get wrap it as SocketInputStream.
      * The parameters in the pig script will be substituted with the values in params
      * @param in
      * @param params the key is the parameter name, and the value is the parameter value
      * @throws IOException
      */
     public void registerScript(InputStream in, Map<String,String> params) throws IOException{
         registerScript(in, params, null);
     }

     /**
      * Register a pig script from InputStream source which is more general and extensible
      * the pig script can be from local file, then you can use FileInputStream.
      * or pig script can be in memory which you build it dynamically, the you can use ByteArrayInputStream
      * even pig script can be in remote machine, which you get wrap it as SocketInputStream
      * The parameters in the pig script will be substituted with the values in the parameter files
      * @param in
      * @param paramsFiles  files which have the parameter setting
      * @throws IOException
      */
     public void registerScript(InputStream in, List<String> paramsFiles) throws IOException {
         registerScript(in, null, paramsFiles);
     }

     /**
      * Register a pig script from InputStream.<br>
      * The pig script can be from local file, then you can use FileInputStream.
      * Or pig script can be in memory which you build it dynamically, the you can use ByteArrayInputStream
      * Pig script can even be in remote machine, which you get wrap it as SocketInputStream.<br>
      * The parameters in the pig script will be substituted with the values in the map and the parameter files.
      * The values in params Map will override the value in parameter file if they have the same parameter
      * @param in
      * @param params the key is the parameter name, and the value is the parameter value
      * @param paramsFiles  files which have the parameter setting
      * @throws IOException
      */
     public void registerScript(InputStream in, Map<String,String> params,List<String> paramsFiles) throws IOException {
         try {
             String substituted = pigContext.doParamSubstitution(in, paramMapToList(params), paramsFiles);
             GruntParser grunt = new GruntParser(new StringReader(substituted), this);
             grunt.setInteractive(false);
             grunt.parseStopOnError(true);
         } catch (org.apache.pig.tools.pigscript.parser.ParseException e) {
             log.error(e.getLocalizedMessage());
             throw new IOException(e);
         }
     }

     protected List<String> paramMapToList(Map<String, String> params) {
         List<String> paramList = new ArrayList<String>();
         if (params != null) {
             for (Map.Entry<String, String> entry : params.entrySet()) {
                 paramList.add(entry.getKey() + "=" + entry.getValue());
             }
         }
         return paramList;
     }

     /**
      * Creates a clone of the current DAG
      * @return A Graph object which is a clone of the current DAG
      * @throws IOException
      */
     protected Graph getClonedGraph() throws IOException {
         Graph graph = currDAG.duplicate();

         if (graph == null) {
             int errCode = 2127;
             String msg = "Cloning of plan failed.";
             throw new FrontendException(msg, errCode, PigException.BUG);
         }
         return graph;
     }


     /**
      * Register a query with the Pig runtime.  The query will be read from the indicated file.
      * @param fileName file to read query from.
      * @throws IOException
      */
     public void registerScript(String fileName) throws IOException {
         registerScript(fileName, null, null);
     }

     /**
      * Register a pig script file.  The parameters in the file will be substituted with the values in params
      * @param fileName  pig script file
      * @param params  the key is the parameter name, and the value is the parameter value
      * @throws IOException
      */
     public void registerScript(String fileName, Map<String,String> params) throws IOException {
         registerScript(fileName, params, null);
     }


     /**
      * Register a pig script file.  The parameters in the file will be substituted with the values in the parameter files
      * @param fileName pig script file
      * @param paramsFiles  files which have the parameter setting
      * @throws IOException
      */
     public void registerScript(String fileName, List<String> paramsFiles) throws IOException {
         registerScript(fileName, null, paramsFiles);
     }

     /**
      * Register a pig script file.  The parameters in the file will be substituted with the values in the map and the parameter files
      * The values in params Map will override the value in parameter file if they have the same parameter
      * @param fileName  pig script
      * @param params  the key is the parameter name, and the value is the parameter value
      * @param paramsFiles   files which have the parameter setting
      * @throws IOException
      */
     public void registerScript(String fileName, Map<String,String> params,List<String> paramsFiles) throws IOException {
         FileInputStream fis = null;
         try{
             fis = new FileInputStream(fileName);
             registerScript(fis, params, paramsFiles);
         }catch (FileNotFoundException e){
             log.error(e.getLocalizedMessage());
             throw new IOException(e);
         } finally {
             if (fis != null) {
                 fis.close();
             }
         }
     }

     /**
      * Intended to be used by unit tests only.
      * Print a list of all aliases in in the current Pig Latin script.  Output is written to
      * System.out.
      * @throws FrontendException
      */
     public void printAliases () throws FrontendException {
         System.out.println("aliases: " + currDAG.getAliasOp().keySet());
     }

     /**
      * Write the schema for an alias to System.out.
      * @param alias Alias whose schema will be written out
      * @return Schema of alias dumped
      * @throws IOException
      */
     public Schema dumpSchema(String alias) throws IOException {
         try {
             pigContext.inDumpSchema = true;
             if ("@".equals(alias)) {
                 alias = getLastRel();
             }
             LogicalRelationalOperator op = getOperatorForAlias( alias );
             LogicalSchema schema = op.getSchema();

             boolean pretty = "true".equals(pigContext.getProperties()
                                    .getProperty(PRETTY_PRINT_SCHEMA_PROPERTY));

             if (schema != null) {
                 Schema s = org.apache.pig.newplan.logical.Util.translateSchema(schema);
                 System.out.println(alias + ": " + (pretty ? s.prettyPrint() : s.toString()));
                 return s;
             } else {
                 System.out.println("Schema for " + alias + " unknown.");
                 return null;
             }
         } catch (FrontendException fee) {
             int errCode = 1001;
             String msg = "Unable to describe schema for alias " + alias;
             throw new FrontendException (msg, errCode, PigException.INPUT, false, null, fee);
         } finally {
             pigContext.inDumpSchema = false;
         }
     }

     /**
      * Write the schema for a nestedAlias to System.out. Denoted by
      * alias::nestedAlias.
      *
      * @param alias Alias whose schema has nestedAlias
      * @param nestedAlias Alias whose schema will be written out
      * @return Schema of alias dumped
      * @throws IOException
      */
     public Schema dumpSchemaNested(String alias, String nestedAlias) throws IOException {
         try {
             pigContext.inDumpSchema = true;
             if ("@".equals(alias)) {
                 alias = getLastRel();
             }
             Operator op = getOperatorForAlias( alias );
             if( op instanceof LOForEach ) {
                 LogicalSchema nestedSc = ((LOForEach)op).dumpNestedSchema(alias, nestedAlias);
                 if (nestedSc!=null) {
                     Schema s = org.apache.pig.newplan.logical.Util.translateSchema(nestedSc);
                     System.out.println(alias+ "::" + nestedAlias + ": " + s.toString());
                     return s;
                 }
                 else {
                     System.out.println("Schema for "+ alias+ "::" + nestedAlias + " unknown.");
                     return null;
                 }
             }
             else {
                 int errCode = 1001;
                 String msg = "Unable to describe schema for " + alias + "::" + nestedAlias;
                 throw new FrontendException (msg, errCode, PigException.INPUT, false, null);
             }
         } finally {
             pigContext.inDumpSchema = false;
         }
     }

     /**
      * Set the name of the job.  This name will get translated to mapred.job.name.
      * @param name of job
      */
     public void setJobName(String name) {
         jobName = PigContext.JOB_NAME_PREFIX + ":" + name;
     }

     /**
      * Set Hadoop job priority.  This value will get translated to mapred.job.priority.
      * @param priority valid values are found in {@link org.apache.hadoop.mapred.JobPriority}
      */
     public void setJobPriority(String priority) {
         jobPriority = priority;
     }

     /**
      * Executes a Pig Latin script up to and including indicated alias.  That is, if a user does:
      * <pre>
      * PigServer server = new PigServer();
      * server.registerQuery("A = load 'foo';");
      * server.registerQuery("B = filter A by $0 &gt; 0;");
      * server.registerQuery("C = order B by $1;");
      * </pre>
      * Then
      * <pre>
      * server.openIterator("B");
      * </pre>
      * filtered but unsorted data will be returned.  If instead a user does
      * <pre>
      * server.openIterator("C");
      * </pre>
      * filtered and sorted data will be returned.
      * @param id Alias to open iterator for
      * @return iterator of tuples returned from the script
      * @throws IOException
      */
     public Iterator<Tuple> openIterator(String id) throws IOException {
         try {
             pigContext.getProperties().setProperty( PigContext.JOB_NAME, jobName );
             if( jobPriority != null ) {
                 pigContext.getProperties().setProperty( PigContext.JOB_PRIORITY, jobPriority );
             }
             ExecJob job = store(id, FileLocalizer.getTemporaryPath(pigContext)
                     .toString(), Utils.getTmpFileCompressorName(pigContext)
                     + "()");

             // invocation of "execute" is synchronous!

             if (job.getStatus() == JOB_STATUS.COMPLETED) {
                 return job.getResults();
             } else if (job.getStatus() == JOB_STATUS.FAILED
                     && job.getException() != null) {
                 // throw the backend exception in the failed case
                 Exception e = job.getException();
                 int errCode = 1066;
                 String msg = "Unable to open iterator for alias " + id
                         + ". Backend error : " + e.getMessage();
                 throw new FrontendException(msg, errCode, PigException.INPUT, e);
             } else {
                 throw new IOException("Job terminated with anomalous status "
                         + job.getStatus().toString());
             }
         } catch (FrontendException e) {
             throw e;
         } catch (Exception e) {
             int errCode = 1066;
             String msg = "Unable to open iterator for alias " + id;
             throw new FrontendException(msg, errCode, PigException.INPUT, e);
         }
     }

     /**
      * Executes a Pig Latin script up to and including indicated alias and stores the resulting
      * records into a file.  That is, if a user does:
      * <pre>
      * PigServer server = new PigServer();
      * server.registerQuery("A = load 'foo';");
      * server.registerQuery("B = filter A by $0 &gt; 0;");
      * server.registerQuery("C = order B by $1;");
      * </pre>
      * Then
      * <pre>
      * server.store("B", "bar");
      * </pre>
      * filtered but unsorted data will be stored to the file <tt>bar</tt>.  If instead a user does
      * <pre>
      * server.store("C", "bar");
      * </pre>
      * filtered and sorted data will be stored to the file <tt>bar</tt>.
      * Equivalent to calling {@link #store(String, String, String)} with
      * <tt>org.apache.pig.PigStorage</tt> as the store function.
      * @param id The alias to store
      * @param filename The file to which to store to
      * @return {@link ExecJob} containing information about this job
      * @throws IOException
      */
     public ExecJob store(String id, String filename) throws IOException {
         return store(id, filename, PigStorage.class.getName() + "()");   // SFPig is the default store function
     }

     /**
      * Executes a Pig Latin script up to and including indicated alias and stores the resulting
      * records into a file.  That is, if a user does:
      * <pre>
      * PigServer server = new PigServer();
      * server.registerQuery("A = load 'foo';");
      * server.registerQuery("B = filter A by $0 &gt; 0;");
      * server.registerQuery("C = order B by $1;");
      * </pre>
      * Then
      * <pre>
      * server.store("B", "bar", "mystorefunc");
      * </pre>
      * filtered but unsorted data will be stored to the file <tt>bar</tt> using
      * <tt>mystorefunc</tt>.  If instead a user does
      * <pre>
      * server.store("C", "bar", "mystorefunc");
      * </pre>
      * filtered and sorted data will be stored to the file <tt>bar</tt> using
      * <tt>mystorefunc</tt>.
      * <p>
      * @param id The alias to store
      * @param filename The file to which to store to
      * @param func store function to use
      * @return {@link ExecJob} containing information about this job
      * @throws IOException
      */
     public ExecJob store(String id, String filename, String func)
             throws IOException {
         PigStats stats = storeEx(id, filename, func);
         if (stats.getOutputStats().size() < 1) {
             throw new IOException("Couldn't retrieve job.");
         }
         OutputStats output = stats.getOutputStats().get(0);

         if(stats.isSuccessful()){
             return  new HJob(JOB_STATUS.COMPLETED, pigContext, output
                     .getPOStore(), output.getAlias(), stats);
         }else{
             HJob job = new HJob(JOB_STATUS.FAILED, pigContext,
                     output.getPOStore(), output.getAlias(), stats);

             //check for exception
             Exception ex = null;
             for(JobStats js : stats.getJobGraph()){
                 if(js.getException() != null) {
                     ex = js.getException();
                 }
             }
             job.setException(ex);
             return job;
         }
     }

     private PigStats storeEx(String alias, String filename, String func)
     throws IOException {
         if ("@".equals(alias)) {
             alias = getLastRel();
         }
         currDAG.parseQuery();
         currDAG.skipStores(); // skip the stores that have already been processed
         currDAG.buildPlan( alias );

         try {
             QueryParserUtils.attachStorePlan(scope, currDAG.lp, filename, func, currDAG.getOperator( alias ), alias, pigContext);
             currDAG.compile();
             return executeCompiledLogicalPlan();
         } catch (PigException e) {
             int errCode = 1002;
             String msg = "Unable to store alias " + alias;
             throw new PigException(msg, errCode, PigException.INPUT, e);
         }
     }

     /**
      * Provide information on how a pig query will be executed.  For now
      * this information is very developer focussed, and probably not very
      * useful to the average user.
      * @param alias Name of alias to explain.
      * @param stream PrintStream to write explanation to.
      * @throws IOException if the requested alias cannot be found.
      */
     public void explain(String alias,
                         PrintStream stream) throws IOException {
         explain(alias, "text", true, false, stream, stream, null, null);
     }

     /**
      * Provide information on how a pig query will be executed.
      * @param alias Name of alias to explain.
      * @param format Format in which the explain should be printed.  If text, then the plan will
      * be printed in plain text.  Otherwise, the execution plan will be printed in
      * <a href="http://en.wikipedia.org/wiki/DOT_language">DOT</a> format.
      * @param verbose Controls the amount of information printed
      * @param markAsExecute When set will treat the explain like a
      * call to execute in the respoect that all the pending stores are
      * marked as complete.
      * @param lps Stream to print the logical tree
      * @param eps Stream to print the ExecutionEngine trees. If null, then will print to files
      * @param dir Directory to print ExecutionEngine trees. If null, will use eps
      * @param suffix Suffix of file names
      * @throws IOException if the requested alias cannot be found.
      */
     public void explain(String alias,
                         String format,
                         boolean verbose,
                         boolean markAsExecute,
                         PrintStream lps,
                         PrintStream eps,
                         File dir,
                         String suffix) throws IOException {
         try {
             pigContext.inExplain = true;
             buildStorePlan( alias );
             currDAG.lp.optimize(pigContext);

             //Only add root xml node if all plans are being written to same stream.
             if (format == "xml" && lps == eps) {
                 lps.println("<plan>");
             }

             currDAG.lp.explain(lps, format, verbose);

             if( currDAG.lp.size() == 0 ) {
                 if (format == "xml" && lps == eps) {
                     lps.println("</plan>");
                 }
                 return;
             }

             pigContext.getExecutionEngine().explain(currDAG.lp, pigContext, eps, format, verbose, dir, suffix );

             if (format.equals("xml") && lps == eps) {
                 lps.println("</plan>");
             }

             if (markAsExecute) {
                 currDAG.markAsExecuted();
             }
         } catch (Exception e) {
             int errCode = 1067;
             String msg = "Unable to explain alias " + alias;
             throw new FrontendException(msg, errCode, PigException.INPUT, e);
         } finally {
             pigContext.inExplain = false;
         }
     }

     /**
      * Returns the unused byte capacity of an HDFS filesystem. This value does
      * not take into account a replication factor, as that can vary from file
      * to file. Thus if you are using this to determine if you data set will fit
      * in the HDFS, you need to divide the result of this call by your specific replication
      * setting.
      * @return unused byte capacity of the file system.
      * @throws IOException
      */
     public long capacity() throws IOException {
         if (pigContext.getExecType().isLocal()) {
             throw new IOException("capacity only supported for non-local execution");
         }
         else {
             DataStorage dds = pigContext.getDfs();

             Map<String, Object> stats = dds.getStatistics();

             String rawCapacityStr = (String) stats.get(DataStorage.RAW_CAPACITY_KEY);
             String rawUsedStr = (String) stats.get(DataStorage.RAW_USED_KEY);

             if ((rawCapacityStr == null) || (rawUsedStr == null)) {
                 throw new IOException("Failed to retrieve capacity stats");
             }

             long rawCapacityBytes = new Long(rawCapacityStr).longValue();
             long rawUsedBytes = new Long(rawUsedStr).longValue();

             return rawCapacityBytes - rawUsedBytes;
         }
     }

     /**
      * Returns the length of a file in bytes which exists in the HDFS (accounts for replication).
      * @param filename
      * @return length of the file in bytes
      * @throws IOException
      */
     public long fileSize(String filename) throws IOException {
         DataStorage dfs = pigContext.getDfs();
         ElementDescriptor elem = dfs.asElement(filename);
         Map<String, Object> stats = elem.getStatistics();
         long length = (Long) stats.get(ElementDescriptor.LENGTH_KEY);
         int replication = (Short) stats
                 .get(ElementDescriptor.BLOCK_REPLICATION_KEY);

         return length * replication;
     }

     /**
      * Test whether a file exists.
      * @param filename to test
      * @return true if file exists, false otherwise
      * @throws IOException
      */
     public boolean existsFile(String filename) throws IOException {
         ElementDescriptor elem = pigContext.getDfs().asElement(filename);
         return elem.exists();
     }

     /**
      * Delete a file.
      * @param filename to delete
      * @return true
      * @throws IOException
      */
     public boolean deleteFile(String filename) throws IOException {
         // Check if this operation is permitted
         filter.validate(PigCommandFilter.Command.RM);
         filter.validate(PigCommandFilter.Command.RMF);

         ElementDescriptor elem = pigContext.getDfs().asElement(filename);
         elem.delete();
         return true;
    }

     /**
      * Rename a file.
      * @param source file to rename
      * @param target new file name
      * @return true
      * @throws IOException
      */
     public boolean renameFile(String source, String target) throws IOException {
         // Check if this operation is permitted
         filter.validate(PigCommandFilter.Command.MV);

         pigContext.rename(source, target);
         return true;
     }

     /**
      * Make a directory.
      * @param dirs directory to make
      * @return true
      * @throws IOException
      */
     public boolean mkdirs(String dirs) throws IOException {
         // Check if this operation is permitted
         filter.validate(PigCommandFilter.Command.MKDIR);

         ContainerDescriptor container = pigContext.getDfs().asContainer(dirs);
         container.create();
         return true;
     }

     /**
      * List the contents of a directory.
      * @param dir name of directory to list
      * @return array of strings, one for each file name
      * @throws IOException
      */
     public String[] listPaths(String dir) throws IOException {
         // Check if this operation is permitted
         filter.validate(PigCommandFilter.Command.LS);

         Collection<String> allPaths = new ArrayList<String>();
         ContainerDescriptor container = pigContext.getDfs().asContainer(dir);
         Iterator<ElementDescriptor> iter = container.iterator();

         while (iter.hasNext()) {
             ElementDescriptor elem = iter.next();
             allPaths.add(elem.toString());
         }

         String[] type = new String[1];
         return allPaths.toArray(type);
     }

     /**
      * Return a map containing the logical plan associated with each alias.
      *
      * @return map
      */
     public Map<String, LogicalPlan> getAliases() {
         Map<String, LogicalPlan> aliasPlans = new HashMap<String, LogicalPlan>();
         for (LogicalRelationalOperator op : currDAG.getAliases().keySet()) {
             String alias = op.getAlias();
             if(null != alias) {
                 aliasPlans.put(alias, currDAG.getAliases().get(op));
             }
         }
         return aliasPlans;
     }

     /**
      * Reclaims resources used by this instance of PigServer. This method
      * deletes all temporary files generated by the current thread while
      * executing Pig commands.
      */
     public void shutdown() {
         // clean-up activities
         // TODO: reclaim scope to free up resources. Currently
         // this is not implemented and throws an exception
         // hence, for now, we won't call it.
         //
         // pigContext.getExecutionEngine().reclaimScope(this.scope);

         FileLocalizer.deleteTempFiles();
     }

     /**
      * Get the set of all current aliases.
      * @return set
      */
     public Set<String> getAliasKeySet() {
         return currDAG.getAliasOp().keySet();
     }

     public Map<Operator, DataBag> getExamples(String alias) throws IOException {
         try {
             if (currDAG.isBatchOn() && alias != null) {
                 currDAG.parseQuery();
                 currDAG.buildPlan( null );
                 execute();
             }
             currDAG.parseQuery();
             currDAG.skipStores();
             currDAG.buildPlan( alias );
             currDAG.compile();
         } catch (IOException e) {
             //Since the original script is parsed anyway, there should not be an
             //error in this parsing. The only reason there can be an error is when
             //the files being loaded in load don't exist anymore.
             e.printStackTrace();
         }

         ExampleGenerator exgen = new ExampleGenerator( currDAG.lp, pigContext );
         try {
             return exgen.getExamples();
         } catch (ExecException e) {
             e.printStackTrace(System.out);
             throw new IOException("ExecException" , e);
         } catch (Exception e) {
             e.printStackTrace(System.out);
             throw new IOException("Exception ", e);
         }

     }

     public void printHistory(boolean withNumbers) {

         List<String> sc = currDAG.getScriptCache();

         if(!sc.isEmpty()) {
             for(int i = 0 ; i < sc.size(); i++) {
                 if(withNumbers) System.out.print((i+1)+"   ");
                 System.out.println(sc.get(i));
             }
         }

     }

     private void buildStorePlan(String alias) throws IOException {
         currDAG.parseQuery();
         currDAG.buildPlan( alias );

         if( !isBatchOn() || alias != null ) {
             // MRCompiler needs a store to be the leaf - hence
             // add a store to the plan to explain
             QueryParserUtils.attachStorePlan(scope, currDAG.lp, "fakefile", null, currDAG.getOperator( alias ),
                     "fake", pigContext );
         }
         currDAG.compile();
     }

     /**
      * Compile and execute the current plan.
      * @return
      * @throws IOException
      */
     private PigStats execute() throws IOException {
         pigContext.getProperties().setProperty( PigContext.JOB_NAME, jobName );
         if( jobPriority != null ) {
             pigContext.getProperties().setProperty( PigContext.JOB_PRIORITY, jobPriority );
         }

         // In this plan, all stores in the plan will be executed. They should be ignored if the plan is reused.
         currDAG.countExecutedStores();

         currDAG.compile();

         if( currDAG.lp.size() == 0 ) {
            return PigStats.get();
         }

         pigContext.getProperties().setProperty("pig.logical.plan.signature", currDAG.lp.getSignature());

         PigStats stats = executeCompiledLogicalPlan();

         return stats;
     }

     private PigStats executeCompiledLogicalPlan() throws ExecException,
             FrontendException {
         // discover pig features used in this script
         ScriptState.get().setScriptFeatures(currDAG.lp);
         currDAG.lp.optimize(pigContext);

         return launchPlan(currDAG.lp, "job_pigexec_");
     }

     /**
      * A common method for launching the jobs according to the logical plan
      * @param lp The logical plan
      * @param jobName A String containing the job name to be used
      * @return The PigStats object
      * @throws ExecException
      * @throws FrontendException
      */
     protected PigStats launchPlan(LogicalPlan lp, String jobName) throws ExecException, FrontendException {

         PigStats stats = null;
         try {
             stats = pigContext.getExecutionEngine().launchPig(lp, jobName, pigContext);
         } catch (ExecException e) {
             throw e;
         } catch (FrontendException e) {
             throw e;
         } catch (Exception e) {
             // There are a lot of exceptions thrown by the launcher.  If this
             // is an ExecException, just let it through.  Else wrap it.
             int errCode = 2043;
             String msg = "Unexpected error during execution.";
             throw new ExecException(msg, errCode, PigException.BUG, e);
         }

         return stats;
     }

     /**
      * NOTE: For testing only. Don't use.
      * @throws IOException
      */
     @SuppressWarnings("unused")
     private LogicalPlan buildLp() throws IOException {
         currDAG.buildPlan( null);
         currDAG.compile();
         return currDAG.lp;
     }

     private LogicalRelationalOperator getOperatorForAlias(String alias) throws IOException {
         buildStorePlan (alias);
         LogicalRelationalOperator op = (LogicalRelationalOperator)currDAG.getOperator( alias );
         if( op == null ) {
             int errCode = 1005;
             String msg = "No plan for " + alias + " to describe";
             throw new FrontendException(msg, errCode, PigException.INPUT, false, null);
         }
         return op;
     }

     /**
      * Returns data associated with LogicalPlan. It makes
      * sense to call this method only after a query/script
      * has been registered with one of the {@link #registerQuery(String)}
      * or {@link #registerScript(InputStream)} methods.
      *
      * @return LogicalPlanData
      */
     public LogicalPlanData getLogicalPlanData() {
         return new LogicalPlanData(currDAG.lp);
     }

     /*
      * This class holds the internal states of a grunt shell session.
      */
     protected class Graph {

         private final Map<LogicalRelationalOperator, LogicalPlan> aliases = new HashMap<LogicalRelationalOperator, LogicalPlan>();

         private Map<String, Operator> operators = new HashMap<String, Operator>();
         private String lastRel;

         private final List<String> scriptCache = new ArrayList<String>();

         // the fileNameMap contains filename to canonical filename
         // mappings. This is done so we can reparse the cached script
         // and remember the translation (current directory might only
         // be correct during the first parse
         private Map<String, String> fileNameMap = new HashMap<String, String>();

         private final boolean batchMode;

         private int processedStores = 0;

         private LogicalPlan lp;

         private int currentLineNum = 0;

         public Graph(boolean batchMode) {
             this.batchMode = batchMode;
             this.lp = new LogicalPlan();
         };

         /**
          * Call back method for counting executed stores.
          */
         private void countExecutedStores() throws FrontendException {
             List<LOStore> sinks = Util.getLogicalRelationalOperators(lp, LOStore.class);
             processedStores += sinks.size();
         }

         Map<LogicalRelationalOperator, LogicalPlan> getAliases() {
             return aliases;
         }

         Map<String, Operator> getAliasOp() {
             return operators;
         }

         boolean isBatchOn() {
             return batchMode;
         };

         boolean isBatchEmpty() {
             for( Operator op : lp.getSinks() ) {
                 if( op instanceof LOStore )
                     return false;
             }
             return true;
         }

         void markAsExecuted() {
         }

         public LogicalPlan getLogicalPlan() {
             return this.lp;
         }

         /**
          * Get the operator with the given alias in the raw plan. Null if not
          * found.
          */
         Operator getOperator(String alias) throws FrontendException {
             return operators.get( alias );
         }

         public LogicalPlan getPlan(String alias) throws IOException {
             LogicalPlan plan = lp;

             if (alias != null) {
                 LogicalRelationalOperator op = (LogicalRelationalOperator) operators.get(alias);
                 if(op == null) {
                     int errCode = 1003;
                     String msg = "Unable to find an operator for alias " + alias;
                     throw new FrontendException(msg, errCode, PigException.INPUT);
                 }
                 plan = aliases.get(op);
             }
             return plan;
         }


         /**
          * Build a plan for the given alias. Extra branches and child branch under alias
          * will be ignored. Dependent branch (i.e. scalar) will be kept.
          * @throws IOException
          */
         void buildPlan(String alias) throws IOException {
             if( alias == null )
                 skipStores();

             final Queue<Operator> queue = new LinkedList<Operator>();
             if( alias != null ) {
                 Operator op = getOperator( alias );
                 if (op == null) {
                     String msg = "Unable to find an operator for alias " + alias;
                     throw new FrontendException( msg, 1003, PigException.INPUT );
                 }
                 queue.add( op );
             } else {
                 List<LOStore> stores = Util.getLogicalRelationalOperators(lp, LOStore.class);
                 for (LOStore op : stores) {
                     boolean addSink = true;
                     // Only add if all the successors are loads
                     List<Operator> succs = lp.getSuccessors(op);
                     if (succs != null && succs.size() > 0) {
                         for (Operator succ : succs) {
                             if (!(succ instanceof LOLoad)) {
                                 addSink = false;
                                 break;
                             }
                         }
                     }
                     if (addSink) {
                         queue.add(op);
                     }
                 }
             }

             LogicalPlan plan = new LogicalPlan();

             while( !queue.isEmpty() ) {
                 Operator currOp = queue.poll();
                 plan.add( currOp );

                 List<Operator> preds = lp.getPredecessors( currOp );
                 if( preds != null ) {
                     List<Operator> ops = new ArrayList<Operator>( preds );
                     for( Operator pred : ops ) {
                         if( !queue.contains( pred ) )
                             queue.add( pred );
                         plan.connect( pred, currOp );
                     }
                 }

                 // visit expression associated with currOp. If it refers to any other operator
                 // that operator is also going to be enqueued.
                 currOp.accept( new AllExpressionVisitor( plan, new DependencyOrderWalker( plan ) ) {
                         @Override
                         protected LogicalExpressionVisitor getVisitor(LogicalExpressionPlan exprPlan)
                         throws FrontendException {
                             return new LogicalExpressionVisitor( exprPlan, new DependencyOrderWalker( exprPlan ) ) {
                                 @Override
                                 public void visit(ScalarExpression expr) throws FrontendException {
                                     Operator refOp = expr.getImplicitReferencedOperator();
                                     if( !queue.contains( refOp ) )
                                         queue.add( refOp );
                                 }
                             };
                         }
                     }
                 );

                 currOp.setPlan( plan );
             }
             lp = plan;
         }

         /**
          *  Remove stores that have been executed previously from the overall plan.
          */
         private void skipStores() throws IOException {
             // Get stores specifically
             List<LOStore> sinks = Util.getLogicalRelationalOperators(lp, LOStore.class);
             List<Operator> sinksToRemove = new ArrayList<Operator>();
             int skipCount = processedStores;
             if( skipCount > 0 ) {
                 for( LOStore sink : sinks ) {
                     sinksToRemove.add( sink );
                     skipCount--;
                     if( skipCount == 0 )
                         break;
                 }
             }

             for( Operator op : sinksToRemove ) {
                 // It's fully possible in the multiquery case that
                 // a store that is not a leaf (sink) and therefor has
                 // successors that need to be removed.
                 removeToLoad(op);
                 Operator pred = lp.getPredecessors( op ).get(0);
                 lp.disconnect( pred, op );
                 lp.remove( op );
             }
         }

         private void removeToLoad(Operator toRemove) throws IOException {
             List<Operator> successors = lp.getSuccessors(toRemove);
             List<Operator> succToRemove = new ArrayList<Operator>();
             if (successors != null && successors.size() > 0) {
                 succToRemove.addAll(successors);
                 for (Operator succ : succToRemove) {
                     lp.disconnect( toRemove, succ );
                     if (!(succ instanceof LOLoad)) {
                         removeToLoad(succ);
                         lp.remove(succ);
                     }
                 }
             }
         }

         /**
          * Accumulate the given statement to previous query statements and generate
          * an overall (raw) plan.
          */
         void registerQuery(String query, int startLine, boolean validateEachStatement,
                 boolean skipParseForBatch) throws IOException {
             if( batchMode ) {
                 if( startLine == currentLineNum ) {
                     String line = scriptCache.remove( scriptCache.size() - 1 );
                     scriptCache.add( line + query );
                 } else {
                     while( startLine > currentLineNum + 1 ) {
                         scriptCache.add( "" );
                         currentLineNum++;
                     }
                     BufferedReader br = new BufferedReader(new StringReader(query));
                     String line = br.readLine();
                     while (line != null) {
                         scriptCache.add(line);
                         currentLineNum++;
                         line = br.readLine();
                     }
                 }
                 if (skipParseForBatch) {
                     return;
                 }
             } else {
                 scriptCache.add( query );
             }

             if(validateEachStatement){
                 validateQuery();
             }
             parseQuery();

             if( !batchMode ) {
                 buildPlan( null );
                 for( Operator sink : lp.getSinks() ) {
                     if( sink instanceof LOStore ) {
                         try {
                             execute();
                         } catch (Exception e) {
                             int errCode = 1002;
                             String msg = "Unable to store alias "
                                 + ((LOStore) sink).getAlias();
                             throw new FrontendException(msg, errCode,
                                     PigException.INPUT, e);
                         }
                         break; // We should have at most one store, so break here.
                     }
                 }
             }
         }

         private void validateQuery() throws FrontendException {
             String query = buildQuery();
             QueryParserDriver parserDriver = new QueryParserDriver( pigContext, scope, fileNameMap );
             try {
                 LogicalPlan plan = parserDriver.parse( query );
                 plan.validate(pigContext, scope, true);
             } catch(FrontendException ex) {
                 scriptCache.remove( scriptCache.size() -1 );
                 throw ex;
             }
         }

         public List<String> getScriptCache() {
             return scriptCache;
         }

         /**
          * Parse the accumulated pig statements and generate an overall plan.
          */
         private void parseQuery() throws FrontendException {
             UDFContext.getUDFContext().reset();
             UDFContext.getUDFContext().setClientSystemProps(pigContext.getProperties());

             String query = buildQuery();

             if( query.isEmpty() ) {
                 lp = new LogicalPlan();
                 return;
             }

             try {
                 QueryParserDriver parserDriver = new QueryParserDriver( pigContext, scope, fileNameMap );
                 lp = parserDriver.parse( query );
                 operators = parserDriver.getOperators();
                 lastRel = parserDriver.getLastRel();
             } catch(Exception ex) {
                 scriptCache.remove( scriptCache.size() -1 ); // remove the bad script from the cache.
                 PigException pe = LogUtils.getPigException(ex);
                 int errCode = 1000;
                 String msg = "Error during parsing. "
                         + (pe == null ? ex.getMessage() : pe.getMessage());
                 log.error("exception during parsing: " + msg, ex);
                 if (null == pe) {
                     throw new FrontendException (msg, errCode, PigException.INPUT , ex);
                 } else {
                     throw new FrontendException (msg, errCode, PigException.INPUT , ex, pe.getSourceLocation() );
                 }
             }
         }

         public String getLastRel() {
             return lastRel;
         }

         private String buildQuery() {
             StringBuilder accuQuery = new StringBuilder();
             for( String line : scriptCache ) {
                 accuQuery.append( line + "\n" );
             }

             return accuQuery.toString();
         }

         private void compile() throws IOException {
             lp.validate(pigContext, scope, false);
             currDAG.postProcess();
         }

         private void postProcess() throws IOException {
             // The following code deals with store/load combination of
             // intermediate files. In this case we will replace the load
             // operator
             // with a (implicit) split operator, iff the load/store
             // func is reversible (because that's when we can safely
             // skip the load and keep going with the split output). If
             // the load/store func is not reversible (or they are
             // different functions), we connect the store and the load
             // to remember the dependency.

             Set<LOLoad> loadOps = new HashSet<LOLoad>();
             List<Operator> sources = lp.getSources();
             for (Operator source : sources) {
                 if (source instanceof LOLoad) {
                     loadOps.add((LOLoad)source);
                 }
             }

             Set<LOStore> storeOps = new HashSet<LOStore>();
             List<Operator> sinks = lp.getSinks();
             for (Operator sink : sinks) {
                 if (sink instanceof LOStore) {
                     storeOps.add((LOStore)sink);
                 }
             }

             if ("true".equals(pigContext.getProperties().getProperty(PIG_LOCATION_CHECK_STRICT))) {
                 log.info("Output location strick check enabled");
                 checkDuplicateStoreLoc(storeOps);
             }

             for (LOLoad load : loadOps) {
                 for (LOStore store : storeOps) {
                     String ifile = load.getFileSpec().getFileName();
                     String ofile = store.getFileSpec().getFileName();
                     if (ofile.equals(ifile)) {
                         // if there is no path from the load to the store,
                         // then connect the store to the load to create the
                         // dependency of the store on the load. If there is
                         // a path from the load to the store, then we should
                         // not connect the store to the load and create a cycle
                         if (!store.getPlan().pathExists(load, store)) {
                             store.getPlan().connect(store, load);
                         }
                     }
                 }
             }
         }

         /**
          * This method checks whether the multiple sinks (STORE) use the same
          * "file-based" location. If yes, throws a RuntimeException
          *
          * @param storeOps
          */
         private void checkDuplicateStoreLoc(Set<LOStore> storeOps) {
             Set<String> uniqueStoreLoc = new HashSet<String>();
             for(LOStore store : storeOps) {
                 String fileName = store.getFileSpec().getFileName();
                 if(!uniqueStoreLoc.add(fileName) && UriUtil.isHDFSFileOrLocalOrS3N(fileName, new Configuration(true))) {
                     throw new RuntimeException("Script contains 2 or more STORE statements writing to same location : "+ fileName);
                 }
             }
         }

         protected Graph duplicate() {
             // There are two choices on how we duplicate the logical plan
             // 1 - we really clone each operator and connect up the cloned operators
             // 2 - we cache away the script till the point we need to clone
             // and then simply re-parse the script.
             // The latter approach is used here
             // FIXME: There is one open issue with this now:
             // Consider the following script:
             // A = load 'file:/somefile';
             // B = filter A by $0 > 10;
             // store B into 'bla';
             // rm 'file:/somefile';
             // A = load 'file:/someotherfile'
             // when we try to clone - we try to reparse
             // from the beginning and currently the parser
             // checks for file existence of files in the load
             // in the case where the file is a local one -i.e. with file: prefix
             // This will be a known issue now and we will need to revisit later

             // parse each line of the cached script
             int lineNumber = 1;

             // create data structures needed for parsing
             Graph graph = new Graph(isBatchOn());
             graph.processedStores = processedStores;
             graph.fileNameMap = new HashMap<String, String>(fileNameMap);

             try {
                 for (Iterator<String> it = scriptCache.iterator(); it.hasNext(); lineNumber++) {
                     // always doing registerQuery irrespective of the batch mode
                     // TODO: Need to figure out if anything different needs to happen if batch
                     // mode is not on
                     // Don't have to do the validation again, so set validateEachStatement param to false
                     graph.registerQuery(it.next(), lineNumber, false, false);
                 }
                 graph.postProcess();
             } catch (IOException ioe) {
                 ioe.printStackTrace();
                 graph = null;
             }
             return graph;
         }
     }

     /**
      * This can be called to indicate if the query is being parsed/compiled
      * in a mode that expects each statement to be validated as it is
      * entered, instead of just doing it once for whole script.
      * @param validateEachStatement
      */
     public void setValidateEachStatement(boolean validateEachStatement) {
         this.validateEachStatement = validateEachStatement;
     }

     /**
      * Set whether to skip parsing while registering the query in batch mode
      * @param skipParseInRegisterForBatch
      */
     public void setSkipParseInRegisterForBatch(boolean skipParseInRegisterForBatch) {
         this.skipParseInRegisterForBatch = skipParseInRegisterForBatch;
     }

     public String getLastRel() {
         return currDAG.getLastRel();
     }
 }