| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # 'License'); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an 'AS IS' BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| # |
| # The Pig command script |
| # |
| # Environment Variables |
| # |
| # JAVA_HOME The java implementation to use. Overrides JAVA_HOME. |
| # |
| # PIG_CLASSPATH Extra Java CLASSPATH entries. |
| # |
| # HADOOP_HOME/HADOOP_PREFIX Environment HADOOP_HOME/HADOOP_PREFIX(0.20.205) |
| # |
| # HADOOP_CONF_DIR Hadoop conf dir |
| # |
| # PIG_HEAPSIZE The maximum amount of heap to use, in MB. |
| # Default is 1000. |
| # |
| # PIG_OPTS Extra Java runtime options. |
| # |
| # PIG_CONF_DIR Alternate conf dir. Default is ${PIG_HOME}/conf. |
| # |
| # HBASE_CONF_DIR - Optionally, the HBase configuration to run against |
| # when using HBaseStorage |
| |
| |
| import sys |
| import os |
| import glob |
| import subprocess |
| |
| debug = False |
| restArgs = [] |
| includeHCatalog = False |
| additionalJars = "" |
| |
| for arg in sys.argv: |
| if arg == __file__: |
| continue |
| if arg == "-secretDebugCmd": |
| debug = True |
| elif arg == "-useHCatalog": |
| includeHCatalog = True |
| elif arg.split("=")[0] == "-D.pig.additional.jars": |
| if includeHCatalog == True: |
| additionalJars = arg.split("=")[1] |
| else: |
| restArgs.append(arg) |
| else: |
| restArgs.append(arg) |
| |
| # Determine our absolute path, resolving any symbolic links |
| this = os.path.realpath(sys.argv[0]) |
| bindir = os.path.dirname(this) + os.path.sep |
| |
| # the root of the pig installation |
| os.environ['PIG_HOME'] = os.path.join(bindir, os.path.pardir) |
| |
| if 'PIG_CONF_DIR' not in os.environ: |
| pigPropertiesPath = os.path.join(os.environ['PIG_HOME'], 'conf', 'pig.properties') |
| if os.path.exists(pigPropertiesPath): |
| try: |
| fhdl = open(pigPropertiesPath, 'r') |
| fhdl.close() |
| os.environ['PIG_CONF_DIR'] = os.path.join(os.environ['PIG_HOME'], 'conf') |
| except: |
| # in the small window after checking for file, if file is deleted, |
| # we should fail if we hit an exception |
| sys.exit('Failed to access file %s' % pigPropertiesPath) |
| |
| elif os.path.exists(os.path.join(os.path.sep, 'etc', 'pig')): |
| os.environ['PIG_CONF_DIR'] = os.path.join(os.path.sep, 'etc', 'pig') |
| |
| else: |
| sys.exit('Cannot determine PIG_CONF_DIR. Please set it to the directory containing pig.properties') |
| |
| # Hack to get to read a shell script and the changes to the environment it makes |
| # This is potentially bad because we could execute arbitrary code |
| try: |
| importScript = os.path.join(os.environ['PIG_CONF_DIR'], 'runPigEnv.sh') |
| fd = open(importScript, 'w') |
| fd.write(". " + os.path.join(os.environ['PIG_CONF_DIR'], 'pig-env.sh')) |
| fd.write("\n") |
| fd.write("set") |
| fd.close() |
| outFd = open(os.path.join(os.environ['PIG_CONF_DIR'], 'pigStartPython.out'), 'w') |
| output = subprocess.Popen(importScript, shell=True, stdout=outFd) |
| output.wait() |
| outFd.close() |
| outFd = open(os.path.join(os.environ['PIG_CONF_DIR'], 'pigStartPython.out'), 'r') |
| for line in outFd: |
| if line.split(' ') > 1: |
| continue |
| |
| envSplit = line.split('=') |
| if len(envSplit) == 2: |
| os.environ[envSplit[0]] = os.environ[1] |
| outFd.close() |
| except: |
| pass |
| |
| # functionality similar to the shell script. This executes a pig script instead |
| try: |
| if os.path.exists(os.environ['PIG_CONF_DIR'], 'pig.conf'): |
| pigConf = os.path.join(os.environ['PIG_CONF_DIR'], 'pig.conf') |
| __import__(pigConf) |
| except: |
| pass |
| |
| if 'JAVA_HOME' not in os.environ: |
| sys.exit('Error: JAVA_HOME is not set') |
| |
| if 'HADOOP_HOME' not in os.environ: |
| os.environ['HADOOP_HOME'] = os.path.sep + 'usr' |
| |
| java = os.path.join(os.environ['JAVA_HOME'], 'bin', 'java') |
| javaHeapMax = "-Xmx1000m" |
| |
| if 'PIG_HEAPSIZE' in os.environ: |
| javaHeapMax = '-Xmx' + os.environ['PIG_HEAPSIZE'] + 'm' |
| |
| classpath = os.environ['PIG_CONF_DIR'] |
| classpath += os.pathsep + os.path.join(os.environ['JAVA_HOME'], 'lib', 'tools.jar') |
| |
| if 'PIG_CLASSPATH' in os.environ: |
| classpath += os.pathsep + os.environ['PIG_CLASSPATH'] |
| |
| if 'HADOOP_CONF_DIR' in os.environ: |
| classpath += os.pathsep + os.environ['HADOOP_CONF_DIR'] |
| |
| pigLibJars = glob.glob(os.path.join(os.environ['PIG_HOME'], "lib", "*.jar")) |
| for jar in pigLibJars: |
| classpath += os.pathsep + jar |
| |
| |
| ######### if hcatalog is to be included, add hcatalog and its required jars |
| |
| if includeHCatalog == True: |
| # adding the hive jars required by hcatalog |
| hiveJarLoc = "" |
| if 'HIVE_HOME' in os.environ: |
| hiveJarLoc = os.path.join(os.environ['HIVE_HOME'], "lib") |
| else: |
| if os.path.exists(os.path.join('usr', 'lib', 'hive')): |
| hiveJarLoc = os.path.join('usr', 'lib', 'hive', 'lib') |
| else: |
| sys.exit("Please initialize HIVE_HOME to the hive install directory") |
| |
| allHiveJars = ["hive-metastore-*.jar", "libthrift-*.jar", "hive-exec-*.jar", "libfb303-*.jar", "jdo*-api-*.jar", "slf4j-api-*.jar", "hive-hbase-handler-*.jar"] |
| for jarName in allHiveJars: |
| jar = glob.glob(os.path.join(hiveJarLoc, jarName)) |
| if (len(jar) != 0) and (os.path.exists(jar)): |
| classpath += os.pathsep + jar[0] |
| else: |
| sys.exit("Failed to find the jar %s" % os.path.join(hiveJarLoc, jarName)) |
| |
| # done with adding the hive jars required by hcatalog |
| |
| # adding the hcat jars |
| hcatHome = "" |
| if 'HCAT_HOME' in os.environ: |
| hcatHome = os.environ['HCAT_HOME'] |
| else: |
| if os.path.exists(os.path.join(os.path.sep + "usr", "lib", "hcatalog")): |
| hcatHome = os.path.join(os.path.sep + "usr", "lib", "hcatalog") |
| else: |
| sys.exit("Please initialize HCAT_HOME to the hcatalog install directory") |
| |
| hcatJars = glob.glob(os.path.join(hcatHome, "share", "hcatalog", "*hcatalog-*.jar")) |
| found = False |
| for hcatJar in hcatJars: |
| if hcatJar.find("server") != -1: |
| found = True |
| classpath += os.pathsep + hcatJar |
| break |
| |
| if found == False: |
| sys.exit("Failed to find the hcatalog server jar in %s" % (os.path.join(hcatHome, "share", "hcatalog"))) |
| |
| hcatHBaseJar = glob.glob(os.path.join(hcatHome, "lib", "hbase-storage-handler-*.jar")) |
| try: |
| classpath += os.pathsep + hcatHBaseJar[0] |
| except: |
| pass |
| # done with adding the hcat jars |
| |
| # now also add the additional jars passed through the command line |
| classpath += os.pathsep + additionalJars |
| # done adding the additional jars from the command line |
| |
| ######### done with adding hcatalog and related jars |
| |
| |
| ######### Add the jython jars to classpath |
| |
| jythonJars = glob.glob(os.path.join(os.environ['PIG_HOME'], "lib", "jython*.jar")) |
| if len(jythonJars) == 1: |
| classpath += os.pathsep + jythonJars[0] |
| else: |
| jythonJars = glob.glob(os.path.join(os.environ['PIG_HOME'], "build", "ivy", "lib", "Pig", "jython*.jar")) |
| if len(jythonJars) == 1: |
| classpath += os.pathsep + jythonJars[0] |
| |
| ######### Done adding the jython jars to classpath |
| |
| |
| ######### Add the jruby jars to classpath |
| |
| jrubyJars = glob.glob(os.path.join(os.environ['PIG_HOME'], "lib", "jruby-complete-*.jar")) |
| if len(jrubyJars) == 1: |
| classpath += os.pathsep + jrubyJars[0] |
| else: |
| jrubyJars = glob.glob(os.path.join(os.environ['PIG_HOME'], "build", "ivy", "lib", "Pig", "jruby-complete-*.jar")) |
| if len(jrubyJars) == 1: |
| classpath += os.pathsep + jrubyJars[0] |
| |
| pigJars = glob.glob(os.path.join(os.environ['PIG_HOME'], "share", "pig", "lib", "*.jar")) |
| for jar in pigJars: |
| classpath += os.pathsep + jar |
| |
| ######### Done adding jruby jars to classpath |
| |
| |
| ######### Add hadoop and hbase conf directories |
| |
| hadoopConfDir = os.path.join(os.environ['PIG_HOME'], "etc", "hadoop") |
| if os.path.exists(hadoopConfDir): |
| classpath += os.pathsep + hadoopConfDir |
| |
| if 'HBASE_CONF_DIR' in os.environ: |
| classpath += os.pathsep + os.environ['HBASE_CONF_DIR'] |
| else: |
| hbaseConfDir = os.path.join(os.path.sep + "etc", "hbase") |
| if os.path.exists(hbaseConfDir): |
| classpath += os.pathsep + hbaseConfDir |
| |
| ######### Done adding hadoop and hbase conf directories |
| |
| |
| ######### Locate and add Zookeeper jars if they exist |
| |
| zkDir = "" |
| if 'ZOOKEEPER_HOME' in os.environ: |
| zkDir = os.environ['ZOOKEEPER_HOME'] |
| else: |
| zkDir = os.path.join(os.environ['PIG_HOME'], "share", "zookeeper") |
| |
| if os.path.exists(zkDir): |
| zkJars = glob.glob(os.path.join(zkdir, "zookeeper-*.jar")) |
| for jar in zkJars: |
| classpath += os.pathsep + jar |
| |
| ######### Done adding zookeeper jars |
| |
| |
| ######### Locate and add hbase jars if they exist |
| |
| hbaseDir = "" |
| if 'HBASE_HOME' in os.environ: |
| hbaseDir = os.environ['HBASE_HOME'] |
| else: |
| hbaseDir = os.path.join(os.environ['PIG_HOME'], "share", "hbase") |
| |
| if os.path.exists(hbaseDir): |
| hbaseJars = glob.glob(os.path.join(hbaseDir, "hbase-*.jar")) |
| for jar in hbaseJars: |
| classpath += os.pathsep + jar |
| |
| ######### Done adding hbase jars |
| |
| |
| ######### set the log directory and logfile if they don't exist |
| |
| if 'PIG_LOG_DIR' not in os.environ: |
| pigLogDir = os.path.join(os.environ['PIG_HOME'], "logs") |
| |
| if 'PIG_LOGFILE' not in os.environ: |
| pigLogFile = 'pid.log' |
| |
| ######### Done setting the logging directory and logfile |
| |
| pigOpts = "" |
| try: |
| pigOpts = os.environ['PIG_OPTS'] |
| except: |
| pass |
| |
| pigOpts += " -Dpig.log.dir=" + pigLogDir |
| pigOpts += " -Dpig.log.file=" + pigLogFile |
| pigOpts += " -Dpig.home.dir=" + os.environ['PIG_HOME'] |
| |
| pigJar = "" |
| hadoopBin = "" |
| |
| print "HADOOP_HOME: %s" % os.path.expandvars(os.environ['HADOOP_HOME']) |
| |
| if (os.environ.get('HADOOP_PREFIX') is not None): |
| print "Found a hadoop prefix" |
| hadoopPrefixPath = os.path.expandvars(os.environ['HADOOP_PREFIX']) |
| if os.path.exists(os.path.join(hadoopPrefixPath, "bin", "hadoop")): |
| hadoopBin = os.path.join(hadoopPrefixPath, "bin", "hadoop") |
| |
| if (os.environ.get('HADOOP_HOME') is not None): |
| print "Found a hadoop home" |
| hadoopHomePath = os.path.expandvars(os.environ['HADOOP_HOME']) |
| print "Hadoop home path: %s" % hadoopHomePath |
| if os.path.exists(os.path.join(hadoopHomePath, "bin", "hadoop")): |
| hadoopBin = os.path.join(hadoopHomePath, "bin", "hadoop") |
| |
| if hadoopBin == "": |
| if os.path.exists(os.path.join(os.path.sep + "usr", "bin", "hadoop")): |
| hadoopBin = os.path.join(os.path.sep + "usr", "bin", "hadoop") |
| |
| # find out the HADOOP_HOME in order to find hadoop jar |
| # we use the name of hadoop jar to decide if user is using |
| # hadoop 1 or hadoop 2 |
| if (hadoopHomePath is None and hadoopPrefixPath is not None): |
| hadoopHomePath = hadoopPrefixPath |
| |
| if (os.environ.get('HADOOP_HOME') is None and hadoopBin != ""): |
| hadoopHomePath = os.path.join(hadoopBin, "..") |
| |
| hadoopCoreJars = glob.glob(os.path.join(hadoopHomePath, "hadoop-core*.jar")) |
| if len(hadoopCoreJars) == 0: |
| hadoopVersion = 2 |
| else: |
| sys.exit("Cannot locate Hadoop 2 binaries, please install Hadoop 2.x and try again.") |
| |
| if hadoopBin != "": |
| if debug == True: |
| print "Find hadoop at %s" % hadoopBin |
| |
| if os.path.exists(os.path.join(os.environ['PIG_HOME'], "pig-core-h$hadoopVersion.jar")): |
| pigJar = os.path.join(os.environ['PIG_HOME'], "pig-core-h$hadoopVersion.jar") |
| |
| else: |
| pigJars = glob.glob(os.path.join(os.environ['PIG_HOME'], "pig-*-core-h" + str(hadoopVersion) + ".jar")) |
| if len(pigJars) == 1: |
| pigJar = pigJars[0] |
| |
| elif len(pigJars) > 1: |
| print "Ambiguity with pig jars found the following jars" |
| print pigJars |
| sys.exit("Please remove irrelavant jars from %s" % os.path.join(os.environ['PIG_HOME'], "pig-*-core-h" + str(hadoopVersion) + ".jar")) |
| else: |
| pigJars = glob.glob(os.path.join(os.environ['PIG_HOME'], "share", "pig", "pig-*-core-h" + str(hadoopVersion) + ".jar")) |
| if len(pigJars) == 1: |
| pigJar = pigJars[0] |
| else: |
| sys.exit("Cannot locate pig-core-h2.jar do 'ant jar', and try again") |
| |
| pigLibJars = glob.glob(os.path.join(os.environ['PIG_HOME']+"/lib", "h" + str(hadoopVersion), "*.jar")) |
| for jar in pigLibJars: |
| classpath += os.pathsep + jar |
| |
| if 'HADOOP_CLASSPATH' in os.environ: |
| os.environ['HADOOP_CLASSPATH'] += os.pathsep + classpath |
| else: |
| os.environ['HADOOP_CLASSPATH'] = classpath |
| |
| os.environ['HADOOP_CLASSPATH'] += os.pathsep + pigJar |
| |
| if debug == True: |
| print "dry run:" |
| print "HADOOP_CLASSPATH: %s" % os.environ['HADOOP_CLASSPATH'] |
| try: |
| print "HADOOP_OPTS: %s" % os.environ['HADOOP_OPTS'] |
| except: |
| pass |
| print "%s jar %s %s" % (hadoopBin, pigJar, ' '.join(restArgs)) |
| |
| else: |
| cmdLine = hadoopBin + ' jar ' + pigJar + ' ' + ' '.join(restArgs) |
| subprocess.call(cmdLine, shell=True) |
| |
| else: |
| # fall back to use fat pig.jar |
| if debug == True: |
| print "Cannot find local hadoop installation, using bundled hadoop 2" |
| |
| if os.path.exists(os.path.join(os.environ['PIG_HOME'], "pig-core-h2.jar")): |
| pigJar = os.path.join(os.environ['PIG_HOME'], "pig-core-h2.jar") |
| |
| else: |
| pigJars = glob.glob(os.path.join(os.environ['PIG_HOME'], "pig-*-core-h2.jar")) |
| |
| if len(pigJars) == 1: |
| pigJar = pigJars[0] |
| |
| elif len(pigJars) > 1: |
| print "Ambiguity with pig jars found the following jars" |
| print pigJars |
| sys.exit("Please remove irrelavant jars from %s" % os.path.join(os.environ['PIG_HOME'], "pig-core-h2.jar")) |
| else: |
| sys.exit("Cannot locate pig-core-h2.jar. do 'ant jar' and try again") |
| |
| pigLibJars = glob.glob(os.path.join(os.environ['PIG_HOME']+"/lib", "h2", "*.jar")) |
| for jar in pigLibJars: |
| classpath += os.pathsep + jar |
| |
| pigLibJars = glob.glob(os.path.join(os.environ['PIG_HOME']+"/lib", "hadoop2-runtime", "*.jar")) |
| for jar in pigLibJars: |
| classpath += os.pathsep + jar |
| |
| classpath += os.pathsep + pigJar |
| pigClass = "org.apache.pig.Main" |
| if debug == True: |
| print "dry runXXX:" |
| print "%s %s %s -classpath %s %s %s" % (java, javaHeapMax, pigOpts, classpath, pigClass, ' '.join(restArgs)) |
| else: |
| cmdLine = java + ' ' + javaHeapMax + ' ' + pigOpts |
| cmdLine += ' ' + '-classpath ' + classpath + ' ' + pigClass + ' ' + ' '.join(restArgs) |
| subprocess.call(cmdLine, shell=True) |
| |