blob: 17ad97b085ea79f9cefb7c8e6b0eedab68e52d98 [file] [log] [blame]
#!/usr/bin/env bash
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------
if [ "$(basename $PWD)" != "perftest" ];
then
echo "Please execute scripts from directory 'perftest'"
exit 1;
fi
# Command to be executed
CMD="./sparkDML2.sh"
TEMPFOLDER="temp"
# Max memory of data to be benchmarked
# Possible values: 80/80MB, 800/800MB, 8000/8000MB/8GB, 80000/80000MB/80GB, 800000/800000MB/800GB
MAXMEM=80000
# Set properties
export LOG4JPROP='conf/log4j-off.properties'
# make dirs if not exsisting
mkdir -p logs
mkdir -p results
mkdir -p temp
# init time measurement
rm -f results/times.txt
date +"%Y-%m-%d-%T" >> results/times.txt
echo -e "\n$HOSTNAME" >> results/times.txt
echo -e "\n\n" >> results/times.txt
## Data Gen
./datagen/genBinomialData.sh ${CMD} ${TEMPFOLDER} ${MAXMEM} &> logs/genBinomialData.out
./datagen/genMultinomialData.sh ${CMD} ${TEMPFOLDER} ${MAXMEM} &> logs/genMultinomialData.out
#./datagen/genDescriptiveStatisticsData.sh ${CMD} ${TEMPFOLDER} ${MAXMEM} &> logs/genStatsData.out
#./datagen/genStratStatisticsData.sh ${CMD} ${TEMPFOLDER} ${MAXMEM} &> logs/genStratStatsData.out
#./datagen/genClusteringData.sh ${CMD} ${TEMPFOLDER} ${MAXMEM} &> logs/genClusteringData.out
#./datagen/genDimensionReductionData.sh ${CMD} ${TEMPFOLDER} ${MAXMEM} &> logs/genDimensionReductionData.out
#./datagen/genALSData.sh ${CMD} ${TEMPFOLDER} ${MAXMEM} &> logs/genALSData.out
### Micro Benchmarks:
#./MatrixMult.sh ${CMD}
#./MatrixTranspose.sh ${CMD}
# Federate benchmark
#./fed/runAllFed.sh ${CMD} ${TEMPFOLDER} ${MAXMEM}
### Algorithms Benchmarks:
./runAllBinomial.sh ${CMD} ${TEMPFOLDER} ${MAXMEM}
./runAllMultinomial.sh ${CMD} ${TEMPFOLDER} ${MAXMEM}
./runAllRegression.sh ${CMD} ${TEMPFOLDER} ${MAXMEM}
#./runAllStats.sh ${CMD} ${TEMPFOLDER} ${MAXMEM}
#./runAllClustering.sh ${CMD} ${TEMPFOLDER} ${MAXMEM}
#./runAllDimensionReduction.sh ${CMD} ${TEMPFOLDER} ${MAXMEM}
#./runAllALS.sh ${CMD} ${TEMPFOLDER} ${MAXMEM}
#./KnnMissingValueImputation.sh ${CMD} ${MAXMEM}
### IO Benchmarks:
#./runAllIO.sh ${CMD} ${TEMPFOLDER} ${MAXMEM}
# TODO The following benchmarks have yet to be written. The decision tree algorithms additionally need to be fixed.
# add stepwise Linear
# add stepwise GLM
#./runAllTrees.sh $CMD $TEMPFOLDER
# add randomForest
#./runAllMatrixFactorization.sh $CMD $TEMPFOLDER
#./runAllSurvival.sh $CMD $TEMPFOLDER
#KaplanMeier
#Cox
cp results/times.txt "results/times-$HOSTNAME-$(date +"%Y-%m-%d-%T").txt"