blob: 2a61b3b9c49752d791f15864eb7984bb275c3315 [file] [log] [blame]
#!/usr/bin/python
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import glob
import os
import re
import shutil
import subprocess
import sys
# Configuration in script
##############################################################
if not "SCALA_HOME" in os.environ:
sys.stderr.write("Environment variable SCALA_HOME must be set\n")
sys.exit(1)
SCALA_LIB = os.path.join(os.environ["SCALA_HOME"], "lib")
if not "HADOOP_HOME" in os.environ:
sys.stderr.write("Environment variable HADOOP_HOME must be set\n")
sys.exit(1)
HADOOP_HOME = os.environ["HADOOP_HOME"]
HADOOP_JARS = ":".join(glob.glob(os.path.join(HADOOP_HOME, "*.jar")))
#Get the absolute path of the original (non-symlink) file.
if os.path.islink(__file__):
ORIGINAL_FILE = os.readlink(__file__)
else:
ORIGINAL_FILE = __file__
DIST_ROOT = os.path.abspath(os.path.dirname(ORIGINAL_FILE)+"/../")
LIB_DIR = DIST_ROOT + "/lib" # Dir with all scrunch dependencies.
TMPDIR = "/tmp"
BUILDDIR = TMPDIR + "/script-build"
COMPILE_CMD = "java -cp %s/scala-library.jar:%s/scala-compiler.jar -Dscala.home=%s scala.tools.nsc.Main" % (SCALA_LIB, SCALA_LIB, SCALA_LIB)
##############################################################
argv = sys.argv[1:]
if len(argv) < 1:
sys.stderr.write("ERROR: insufficient args.\n")
sys.exit(1)
JOBFILE = argv.pop(0)
def file_type():
m = re.search(r'\.(scala|java)$', JOBFILE)
if m:
return m.group(1)
return None
def is_file():
return file_type() is not None
PACK_RE = r'package ([^;]+)'
OBJECT_RE = r'object\s+([^\s(]+).*(extends|with)\s+PipelineApp.*'
EXTENSION_RE = r'(.*)\.(scala|java)$'
#Get the name of the job from the file.
#the rule is: last class in the file, or the one that matches the filename
def get_job_name(file):
package = ""
job = None
default = None
match = re.search(EXTENSION_RE, file)
if match:
default = match.group(1)
for s in open(file, "r"):
mp = re.search(PACK_RE, s)
mo = re.search(OBJECT_RE, s)
if mp:
package = mp.group(1).trim() + "."
elif mo:
if not job or not default or not job.tolower() == default.tolower():
#use either the last class, or the one with the same name as the file
job = mo.group(1)
if not job:
raise "Could not find job name"
return "%s%s" % (package, job)
else:
return file
LIB_PATH = os.path.abspath(LIB_DIR)
if not os.path.exists(LIB_PATH):
sys.stderr.write("Scrunch distribution lib directory not found; run mvn package to construct a distribution to run examples from.\n")
sys.exit(1)
LIB_JARS = glob.glob(os.path.join(LIB_PATH, "*.jar"))
LIB_CP = ":".join(LIB_JARS)
JOBPATH = os.path.abspath(JOBFILE)
JOB = get_job_name(JOBFILE)
JOBJAR = JOB + ".jar"
JOBJARPATH = os.path.join(TMPDIR, JOBJAR)
def needs_rebuild():
return not os.path.exists(JOBJARPATH) or os.stat(JOBJARPATH).st_mtime < os.stat(JOBPATH).st_mtime
def build_job_jar():
sys.stderr.write("compiling " + JOBFILE + "\n")
if os.path.exists(BUILDDIR):
shutil.rmtree(BUILDDIR)
os.makedirs(BUILDDIR)
cmd = "%s -classpath %s:%s -d %s %s" % (COMPILE_CMD, LIB_CP, HADOOP_JARS, BUILDDIR, JOBFILE)
print cmd
if subprocess.call(cmd, shell=True):
shutil.rmtree(BUILDDIR)
sys.exit(1)
jar_cmd = "jar cf %s -C %s ." % (JOBJARPATH, BUILDDIR)
subprocess.call(jar_cmd, shell=True)
shutil.rmtree(BUILDDIR)
def hadoop_command():
return "HADOOP_CLASSPATH=%s ; %s/bin/hadoop jar %s %s %s" % (LIB_CP, HADOOP_HOME, JOBJARPATH, JOB, " ".join(argv))
if is_file() and needs_rebuild():
build_job_jar()
SHELL_COMMAND = hadoop_command()
print SHELL_COMMAND
os.system(SHELL_COMMAND)