blob: 763130de26e062da5a85d6018950406eaf76cf66 [file] [log] [blame]
#!/usr/bin/env bash
#
#/**
# * Licensed to the Apache Software Foundation (ASF) under one
# * or more contributor license agreements. See the NOTICE file
# * distributed with this work for additional information
# * regarding copyright ownership. The ASF licenses this file
# * to you under the Apache License, Version 2.0 (the
# * "License"); you may not use this file except in compliance
# * with the License. You may obtain a copy of the License at
# *
# * http://www.apache.org/licenses/LICENSE-2.0
# *
# * Unless required by applicable law or agreed to in writing, software
# * distributed under the License is distributed on an "AS IS" BASIS,
# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# * See the License for the specific language governing permissions and
# * limitations under the License.
# */
#
# run a singa job
#
usage="Usage: singa-run.sh [ arguments ]\n
-exec <binary or python script> : if want to use own singa driver\n
-conf <job config file> : need cluster conf if train in a cluster
-resume : if want to recover a job"
# parse arguments
# extract and remove '-exec' and '-conf'
# other arguments remain untouched
exe=./singa
while [ $# != 0 ]; do
if [ $1 == "-exec" ]; then
shift
exe=$1
elif [ $1 == "-conf" ]; then
shift
conf=$1
else
args="$args $1"
fi
shift
done
# get environment variables
. `dirname "${BASH_SOURCE-$0}"`/singa-env.sh
# change conf to an absolute path
if [ ! -z $conf ]; then
conf_dir=`dirname "$conf"`
conf_dir=`cd "$conf_dir">/dev/null; pwd`
conf_base=`basename "$conf"`
job_conf=$conf_dir/$conf_base
if [ ! -f $job_conf ]; then
echo $job_conf not exists
exit 1
fi
fi
# go to singa home to execute binary
cd $SINGA_HOME
# generate unique job id
job_id=`./singatool create`
[ $? == 0 ] || exit 1
echo Unique JOB_ID is $job_id
# generate job info dir
# format: job-JOB_ID-YYYYMMDD-HHMMSS
log_dir=$SINGA_LOG/job-info/job-$job_id-$(date '+%Y%m%d-%H%M%S')
mkdir -p $log_dir
echo Record job information to $log_dir
# generate host file
host_file=$log_dir/job.hosts
./singatool genhost $job_conf 1>$host_file || exit 1
# set command to run singa
singa_run="$exe $args \
-singa_conf $SINGA_HOME/conf/singa.conf \
-singa_job $job_id"
# add -conf if exists
if [ ! -z $job_conf ]; then
singa_run="$singa_run -conf $job_conf"
fi
singa_sshrun="cd $SINGA_HOME; source $SINGA_HOME/conf/profile; $singa_run"
# ssh and start singa processes
ssh_options="-oStrictHostKeyChecking=no \
-oUserKnownHostsFile=/dev/null \
-oLogLevel=quiet"
hosts=`cat $host_file | cut -d ' ' -f 1`
for i in ${hosts[@]} ; do
if [ $i = localhost ] ; then
echo Executing : $singa_run
$singa_run &
else
echo Executing @ $i : $singa_sshrun
ssh $ssh_options $i $singa_sshrun " -host " $i &
fi
done
# generate pid list for this job
sleep 2
./singatool view $job_id 1>$log_dir/job.pids || exit 1
wait