Model/lookalike-model/lookalike_model/run.sh - incubator-bluemarlin - Git at Google

 #!/bin/bash

 # main_clean: preparing cleaned persona, click and show logs data.
 if false
 then
     # generate three tables: new persona, new clicklog, new showlog.
     spark-submit --master yarn --num-executors 20 --executor-cores 5 --executor-memory 8G --driver-memory 8G --conf spark.driver.maxResultSize=5g --conf spark.hadoop.hive.exec.dynamic.partition=true --conf spark.hadoop.hive.exec.dynamic.partition.mode=nonstrict pipeline/main_clean.py config.yml
 fi

 # main_logs: generating union logs and cleaning the logs.
 if false
 then
     spark-submit --master yarn --num-executors 20 --executor-cores 5 --conf spark.hadoop.hive.exec.dynamic.partition=true --conf spark.hadoop.hive.exec.dynamic.partition.mode=nonstrict pipeline/main_logs.py config.yml
 fi

 # main_trainready: generating the trainready data by grouping the data.
 if false
 then
     spark-submit --master yarn --num-executors 20 --executor-cores 5 --conf spark.hadoop.hive.exec.dynamic.partition=true --conf spark.hadoop.hive.exec.dynamic.partition.mode=nonstrict pipeline/main_trainready.py config.yml
 fi

 #Saving tables as <config.pipeline.tfrecords_path>
 if false
 then
     # generate tf records: din tf record in hdfs.
     # after the tf records folder is generated in hdfs, use 'hadoop fs -copyToLocal' to copy it to local.
     spark-submit --jars spark-tensorflow-connector_2.11-1.15.0.jar pipeline/main_tfrecords.py config.yml
 fi
	#!/bin/bash

	# main_clean: preparing cleaned persona, click and show logs data.
	if false
	then
	# generate three tables: new persona, new clicklog, new showlog.
	spark-submit --master yarn --num-executors 20 --executor-cores 5 --executor-memory 8G --driver-memory 8G --conf spark.driver.maxResultSize=5g --conf spark.hadoop.hive.exec.dynamic.partition=true --conf spark.hadoop.hive.exec.dynamic.partition.mode=nonstrict pipeline/main_clean.py config.yml
	fi

	# main_logs: generating union logs and cleaning the logs.
	if false
	then
	spark-submit --master yarn --num-executors 20 --executor-cores 5 --conf spark.hadoop.hive.exec.dynamic.partition=true --conf spark.hadoop.hive.exec.dynamic.partition.mode=nonstrict pipeline/main_logs.py config.yml
	fi

	# main_trainready: generating the trainready data by grouping the data.
	if false
	then
	spark-submit --master yarn --num-executors 20 --executor-cores 5 --conf spark.hadoop.hive.exec.dynamic.partition=true --conf spark.hadoop.hive.exec.dynamic.partition.mode=nonstrict pipeline/main_trainready.py config.yml
	fi

	#Saving tables as <config.pipeline.tfrecords_path>
	if false
	then
	# generate tf records: din tf record in hdfs.
	# after the tf records folder is generated in hdfs, use 'hadoop fs -copyToLocal' to copy it to local.
	spark-submit --jars spark-tensorflow-connector_2.11-1.15.0.jar pipeline/main_tfrecords.py config.yml
	fi