blob: 33c39f5f4bd17f53878eea349192370ac9e22291 [file] [log] [blame]
#!/bin/bash
# setup
export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
cd `pwd`/`dirname $0`
. sh2ju.sh
## clean last build log
juLogClean
if [ $# -eq 1 ]; then
num_gpus=$1
else
num_gpus=4
fi
gpus=`seq 0 $((num_gpus-1)) | paste -sd ","`
# build
build() {
make -C ../.. clean
make -C ../.. -j8
return $?
}
cp ../../make/config.mk ../..
cat >>../../config.mk <<EOF
USE_CUDA=1
USE_CUDA_PATH=/usr/local/cuda
USE_CUDNN=1
USE_DIST_KVSTORE=1
EOF
juLog -name=Build -error=Error build
# python: local kvstore
juLog -name=Python.Local.KVStore -error=Error python test_kvstore.py
# python: distributed kvstore
juLog -name=Python.Distributed.KVStore -error=Error ../../tools/launch.py -n 4 python dist_sync_kvstore.py
# download data
juLog -name=DownloadData bash ./download.sh
# check if the final evaluation accuracy exceed the threshold
check_val() {
expected=$1
pass="Final validation >= $expected, Pass"
fail="Final validation < $expected, Fail"
python ../../tools/parse_log.py log --format none | tail -n1 | \
awk "{ if (\$3~/^[.0-9]+$/ && \$3 > $expected) print \"$pass\"; else print \"$fail\"}"
rm -f log
}
example_dir=../../example/image-classification
# python: lenet + mnist
test_lenet() {
python $example_dir/train_mnist.py \
--data-dir `pwd`/data/mnist/ --network lenet --gpus $gpus --num-epochs 10 \
2>&1 | tee log
check_val 0.99
}
juLog -name=Python.Lenet.Mnist -error=Fail test_lenet
# python: distributed lenet + mnist
test_dist_lenet() {
../../tools/launch.py -n ${num_gpus} \
python ./dist_lenet.py --data-dir `pwd`/data/mnist/ \
--kv-store dist_sync \
--num-epochs 10 \
2>&1 | tee log
check_val 0.98
}
juLog -name=Python.Distributed.Lenet.Mnist -error=Fail test_dist_lenet
# python: inception + cifar10
test_inception_cifar10() {
python $example_dir/train_cifar10.py \
--data-dir `pwd`/data/cifar10/ --gpus $gpus --num-epochs 20 --batch-size 256 \
2>&1 | tee log
check_val 0.82
}
juLog -name=Python.Inception.Cifar10 -error=Fail test_inception_cifar10
# build without CUDNN
cat >>../../config.mk <<EOF
USE_CUDNN=0
EOF
juLog -name=BuildWithoutCUDNN -error=Error build
# python: multi gpus lenet + mnist
juLog -name=Python.Multi.Lenet.Mnist -error=Error python multi_lenet.py
exit $errors