.test-infra/jenkins/job_PerformanceTests_FileBasedIO_IT.groovy - beam - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 import CommonJobProperties as common
 import InfluxDBCredentialsHelper

 def jobs = [
   [
     name               : 'beam_PerformanceTests_TextIOIT',
     description        : 'Runs performance tests for TextIOIT',
     test               : 'org.apache.beam.sdk.io.text.TextIOIT',
     githubTitle        : 'Java TextIO Performance Test',
     githubTriggerPhrase: 'Run Java TextIO Performance Test',
     pipelineOptions    : [
       bigQueryDataset     : 'beam_performance',
       bigQueryTable       : 'textioit_results',
       influxMeasurement   : 'textioit_results',
       numberOfRecords     : '25000000',
       expectedHash        : 'f8453256ccf861e8a312c125dfe0e436',
       datasetSize         : '1062290000',
       numWorkers          : '5',
       autoscalingAlgorithm: 'NONE'
     ]
   ],
   [
     name               : 'beam_PerformanceTests_Compressed_TextIOIT',
     description        : 'Runs performance tests for TextIOIT with GZIP compression',
     test               : 'org.apache.beam.sdk.io.text.TextIOIT',
     githubTitle        : 'Java CompressedTextIO Performance Test',
     githubTriggerPhrase: 'Run Java CompressedTextIO Performance Test',
     pipelineOptions    : [
       bigQueryDataset     : 'beam_performance',
       bigQueryTable       : 'compressed_textioit_results',
       influxMeasurement   : 'compressed_textioit_results',
       numberOfRecords     : '450000000',
       expectedHash        : '8a3de973354abc6fba621c6797cc0f06',
       datasetSize         : '1097840000',
       compressionType     : 'GZIP',
       numWorkers          : '5',
       autoscalingAlgorithm: 'NONE'
     ]
   ],
   [
     name               : 'beam_PerformanceTests_ManyFiles_TextIOIT',
     description        : 'Runs performance tests for TextIOIT with many output files',
     test               : 'org.apache.beam.sdk.io.text.TextIOIT',
     githubTitle        : 'Java ManyFilesTextIO Performance Test',
     githubTriggerPhrase: 'Run Java ManyFilesTextIO Performance Test',
     pipelineOptions    : [
       bigQueryDataset            : 'beam_performance',
       bigQueryTable              : 'many_files_textioit_results',
       influxMeasurement          : 'many_files_textioit_results',
       reportGcsPerformanceMetrics: 'true',
       gcsPerformanceMetrics      : 'true',
       numberOfRecords            : '25000000',
       expectedHash               : 'f8453256ccf861e8a312c125dfe0e436',
       datasetSize                : '1062290000',
       numberOfShards             : '1000',
       numWorkers                 : '5',
       autoscalingAlgorithm       : 'NONE'
     ]

   ],
   [
     name               : 'beam_PerformanceTests_AvroIOIT',
     description        : 'Runs performance tests for AvroIOIT',
     test               : 'org.apache.beam.sdk.io.avro.AvroIOIT',
     githubTitle        : 'Java AvroIO Performance Test',
     githubTriggerPhrase: 'Run Java AvroIO Performance Test',
     pipelineOptions    : [
       numberOfRecords     : '225000000',
       expectedHash        : '2f9f5ca33ea464b25109c0297eb6aecb',
       datasetSize         : '1089730000',
       bigQueryDataset     : 'beam_performance',
       bigQueryTable       : 'avroioit_results',
       influxMeasurement   : 'avroioit_results',
       numWorkers          : '5',
       autoscalingAlgorithm: 'NONE'
     ]
   ],
   [
     name               : 'beam_PerformanceTests_TFRecordIOIT',
     description        : 'Runs performance tests for beam_PerformanceTests_TFRecordIOIT',
     test               : 'org.apache.beam.sdk.io.tfrecord.TFRecordIOIT',
     githubTitle        : 'Java TFRecordIO Performance Test',
     githubTriggerPhrase: 'Run Java TFRecordIO Performance Test',
     pipelineOptions    : [
       bigQueryDataset     : 'beam_performance',
       bigQueryTable       : 'tfrecordioit_results',
       influxMeasurement   : 'tfrecordioit_results',
       numberOfRecords     : '18000000',
       expectedHash        : '543104423f8b6eb097acb9f111c19fe4',
       datasetSize         : '1019380000',
       numWorkers          : '5',
       autoscalingAlgorithm: 'NONE'
     ]
   ],
   [
     name               : 'beam_PerformanceTests_XmlIOIT',
     description        : 'Runs performance tests for beam_PerformanceTests_XmlIOIT',
     test               : 'org.apache.beam.sdk.io.xml.XmlIOIT',
     githubTitle        : 'Java XmlIOPerformance Test',
     githubTriggerPhrase: 'Run Java XmlIO Performance Test',
     pipelineOptions    : [
       bigQueryDataset     : 'beam_performance',
       bigQueryTable       : 'xmlioit_results',
       influxMeasurement   : 'xmlioit_results',
       numberOfRecords     : '12000000',
       expectedHash        : 'b3b717e7df8f4878301b20f314512fb3',
       datasetSize         : '1076590000',
       charset             : 'UTF-8',
       numWorkers          : '5',
       autoscalingAlgorithm: 'NONE'
     ]
   ],
   [
     name               : 'beam_PerformanceTests_ParquetIOIT',
     description        : 'Runs performance tests for beam_PerformanceTests_ParquetIOIT',
     test               : 'org.apache.beam.sdk.io.parquet.ParquetIOIT',
     githubTitle        : 'Java ParquetIOPerformance Test',
     githubTriggerPhrase: 'Run Java ParquetIO Performance Test',
     pipelineOptions    : [
       bigQueryDataset     : 'beam_performance',
       bigQueryTable       : 'parquetioit_results',
       influxMeasurement   : 'parquetioit_results',
       numberOfRecords     : '225000000',
       expectedHash        : '2f9f5ca33ea464b25109c0297eb6aecb',
       datasetSize         : '1087370000',
       numWorkers          : '5',
       autoscalingAlgorithm: 'NONE'
     ]
   ],
   [
     name               : 'beam_PerformanceTests_TextIOIT_HDFS',
     description        : 'Runs performance tests for TextIOIT on HDFS',
     test               : 'org.apache.beam.sdk.io.text.TextIOIT',
     githubTitle        : 'Java TextIO Performance Test on HDFS',
     githubTriggerPhrase: 'Run Java TextIO Performance Test HDFS',
     pipelineOptions    : [
       bigQueryDataset     : 'beam_performance',
       bigQueryTable       : 'textioit_hdfs_results',
       influxMeasurement   : 'textioit_hdfs_results',
       numberOfRecords     : '25000000',
       expectedHash        : 'f8453256ccf861e8a312c125dfe0e436',
       datasetSize         : '1062290000',
       numWorkers          : '5',
       autoscalingAlgorithm: 'NONE'
     ]

   ],
   [
     name               : 'beam_PerformanceTests_Compressed_TextIOIT_HDFS',
     description        : 'Runs performance tests for TextIOIT with GZIP compression on HDFS',
     test               : 'org.apache.beam.sdk.io.text.TextIOIT',
     githubTitle        : 'Java CompressedTextIO Performance Test on HDFS',
     githubTriggerPhrase: 'Run Java CompressedTextIO Performance Test HDFS',
     pipelineOptions    : [
       bigQueryDataset     : 'beam_performance',
       bigQueryTable       : 'compressed_textioit_hdfs_results',
       influxMeasurement   : 'compressed_textioit_hdfs_results',
       numberOfRecords     : '450000000',
       expectedHash        : '8a3de973354abc6fba621c6797cc0f06',
       datasetSize         : '1097840000',
       compressionType     : 'GZIP',
       numWorkers          : '5',
       autoscalingAlgorithm: 'NONE'
     ]
   ],
   [
     name               : 'beam_PerformanceTests_ManyFiles_TextIOIT_HDFS',
     description        : 'Runs performance tests for TextIOIT with many output files on HDFS',
     test               : 'org.apache.beam.sdk.io.text.TextIOIT',
     githubTitle        : 'Java ManyFilesTextIO Performance Test on HDFS',
     githubTriggerPhrase: 'Run Java ManyFilesTextIO Performance Test HDFS',
     pipelineOptions    : [
       bigQueryDataset            : 'beam_performance',
       bigQueryTable              : 'many_files_textioit_hdfs_results',
       influxMeasurement          : 'many_files_textioit_hdfs_results',
       reportGcsPerformanceMetrics: 'true',
       gcsPerformanceMetrics      : 'true',
       numberOfRecords            : '25000000',
       expectedHash               : 'f8453256ccf861e8a312c125dfe0e436',
       datasetSize                : '1062290000',
       numberOfShards             : '1000',
       numWorkers                 : '5',
       autoscalingAlgorithm       : 'NONE'
     ]

   ],
   [
     name               : 'beam_PerformanceTests_AvroIOIT_HDFS',
     description        : 'Runs performance tests for AvroIOIT on HDFS',
     test               : 'org.apache.beam.sdk.io.avro.AvroIOIT',
     githubTitle        : 'Java AvroIO Performance Test on HDFS',
     githubTriggerPhrase: 'Run Java AvroIO Performance Test HDFS',
     pipelineOptions    : [
       bigQueryDataset     : 'beam_performance',
       bigQueryTable       : 'avroioit_hdfs_results',
       influxMeasurement   : 'avroioit_hdfs_results',
       numberOfRecords     : '225000000',
       expectedHash        : '2f9f5ca33ea464b25109c0297eb6aecb',
       datasetSize         : '1089730000',
       numWorkers          : '5',
       autoscalingAlgorithm: 'NONE'
     ]
   ],
   [
     name               : 'beam_PerformanceTests_TFRecordIOIT_HDFS',
     description        : 'Runs performance tests for beam_PerformanceTests_TFRecordIOIT on HDFS',
     test               : 'org.apache.beam.sdk.io.tfrecord.TFRecordIOIT',
     githubTitle        : 'Java TFRecordIO Performance Test on HDFS',
     githubTriggerPhrase: 'Run Java TFRecordIO Performance Test HDFS',
     pipelineOptions    : [
       numberOfRecords     : '18000000',
       expectedHash        : '543104423f8b6eb097acb9f111c19fe4',
       datasetSize         : '1019380000',
       numWorkers          : '5',
       autoscalingAlgorithm: 'NONE'
     ]
   ],
   [
     name               : 'beam_PerformanceTests_XmlIOIT_HDFS',
     description        : 'Runs performance tests for beam_PerformanceTests_XmlIOIT on HDFS',
     test               : 'org.apache.beam.sdk.io.xml.XmlIOIT',
     githubTitle        : 'Java XmlIOPerformance Test on HDFS',
     githubTriggerPhrase: 'Run Java XmlIO Performance Test HDFS',
     pipelineOptions    : [
       bigQueryDataset     : 'beam_performance',
       bigQueryTable       : 'xmlioit_hdfs_results',
       influxMeasurement   : 'xmlioit_hdfs_results',
       numberOfRecords     : '12000000',
       expectedHash        : 'b3b717e7df8f4878301b20f314512fb3',
       datasetSize         : '1076590000',
       charset             : 'UTF-8',
       numWorkers          : '5',
       autoscalingAlgorithm: 'NONE'
     ]
   ],
   [
     name               : 'beam_PerformanceTests_ParquetIOIT_HDFS',
     description        : 'Runs performance tests for beam_PerformanceTests_ParquetIOIT on HDFS',
     test               : 'org.apache.beam.sdk.io.parquet.ParquetIOIT',
     githubTitle        : 'Java ParquetIOPerformance Test on HDFS',
     githubTriggerPhrase: 'Run Java ParquetIO Performance Test HDFS',
     pipelineOptions    : [
       bigQueryDataset     : 'beam_performance',
       bigQueryTable       : 'parquetioit_hdfs_results',
       influxMeasurement   : 'parquetioit_hdfs_results',
       numberOfRecords     : '225000000',
       expectedHash        : '2f9f5ca33ea464b25109c0297eb6aecb',
       datasetSize         : '1087370000',
       numWorkers          : '5',
       autoscalingAlgorithm: 'NONE'
     ]
   ]
 ]

 jobs.findAll {
   it.name in [
     'beam_PerformanceTests_TextIOIT',
     'beam_PerformanceTests_Compressed_TextIOIT',
     'beam_PerformanceTests_ManyFiles_TextIOIT',
     'beam_PerformanceTests_AvroIOIT',
     'beam_PerformanceTests_TFRecordIOIT',
     'beam_PerformanceTests_XmlIOIT',
     'beam_PerformanceTests_ParquetIOIT'
   ]
 }.forEach { testJob -> createGCSFileBasedIOITTestJob(testJob) }

 private void createGCSFileBasedIOITTestJob(testJob) {
   job(testJob.name) {
     description(testJob.description)
     common.setTopLevelMainJobProperties(delegate)
     common.enablePhraseTriggeringFromPullRequest(delegate, testJob.githubTitle, testJob.githubTriggerPhrase)
     common.setAutoJob(delegate, 'H */6 * * *')
     InfluxDBCredentialsHelper.useCredentials(delegate)
     additionalPipelineArgs = [
       influxDatabase: InfluxDBCredentialsHelper.InfluxDBDatabaseName,
       influxHost: InfluxDBCredentialsHelper.InfluxDBHostUrl,
     ]
     testJob.pipelineOptions.putAll(additionalPipelineArgs)

     def dataflowSpecificOptions = [
       runner        : 'DataflowRunner',
       project       : 'apache-beam-testing',
       tempRoot      : 'gs://temp-storage-for-perf-tests',
       filenamePrefix: "gs://temp-storage-for-perf-tests/${testJob.name}/\${BUILD_ID}/",
     ]

     Map allPipelineOptions = dataflowSpecificOptions << testJob.pipelineOptions
     String runner = "dataflow"
     String filesystem = "gcs"
     String testTask = ":sdks:java:io:file-based-io-tests:integrationTest"

     steps {
       gradle {
         rootBuildScriptDir(common.checkoutDir)
         common.setGradleSwitches(delegate)
         switches("--info")
         switches("-DintegrationTestPipelineOptions=\'${common.joinPipelineOptions(allPipelineOptions)}\'")
         switches("-Dfilesystem=\'${filesystem}\'")
         switches("-DintegrationTestRunner=\'${runner}\'")
         tasks("${testTask} --tests ${testJob.test}")
       }
     }
   }
 }

 jobs.findAll {
   it.name in [
     'beam_PerformanceTests_TextIOIT_HDFS',
     'beam_PerformanceTests_Compressed_TextIOIT_HDFS',
     'beam_PerformanceTests_ManyFiles_TextIOIT_HDFS',
     // TODO(BEAM-3945) TFRecord performance test is failing only when running on hdfs.
     // We need to fix this before enabling this job on jenkins.
     //'beam_PerformanceTests_TFRecordIOIT_HDFS',
     'beam_PerformanceTests_AvroIOIT_HDFS',
     'beam_PerformanceTests_XmlIOIT_HDFS',
     'beam_PerformanceTests_ParquetIOIT_HDFS'
   ]
 }.forEach { testJob -> createHDFSFileBasedIOITTestJob(testJob) }

 private void createHDFSFileBasedIOITTestJob(testJob) {
   job(testJob.name) {
     description(testJob.description)
     common.setTopLevelMainJobProperties(delegate)
     common.enablePhraseTriggeringFromPullRequest(delegate, testJob.githubTitle, testJob.githubTriggerPhrase)
     common.setAutoJob(delegate, 'H */6 * * *')
     InfluxDBCredentialsHelper.useCredentials(delegate)
     additionalPipelineArgs = [
       influxDatabase: InfluxDBCredentialsHelper.InfluxDBDatabaseName,
       influxHost: InfluxDBCredentialsHelper.InfluxDBHostUrl,
     ]
     testJob.pipelineOptions.putAll(additionalPipelineArgs)

     String namespace = common.getKubernetesNamespace(testJob.name)
     String kubeconfig = common.getKubeconfigLocationForNamespace(namespace)
     Kubernetes k8s = Kubernetes.create(delegate, kubeconfig, namespace)

     k8s.apply(common.makePathAbsolute("src/.test-infra/kubernetes/hadoop/LargeITCluster/hdfs-multi-datanode-cluster.yml"))
     String hostName = "LOAD_BALANCER_IP"
     k8s.loadBalancerIP("hadoop", hostName)

     Map additionalOptions = [
       runner           : 'DataflowRunner',
       project          : 'apache-beam-testing',
       tempRoot         : 'gs://temp-storage-for-perf-tests',
       hdfsConfiguration: /[{\\\"fs.defaultFS\\\":\\\"hdfs:$${hostName}:9000\\\",\\\"dfs.replication\\\":1}]/,
       filenamePrefix   : "hdfs://\$${hostName}:9000/TEXTIO_IT_"
     ]

     Map allPipelineOptions = testJob.pipelineOptions << additionalOptions
     String runner = "dataflow"
     String filesystem = "hdfs"
     String testTask = ":sdks:java:io:file-based-io-tests:integrationTest"

     steps {
       gradle {
         rootBuildScriptDir(common.checkoutDir)
         common.setGradleSwitches(delegate)
         switches("--info")
         switches("-DintegrationTestPipelineOptions=\'${common.joinPipelineOptions(allPipelineOptions)}\'")
         switches("-Dfilesystem=\'${filesystem}\'")
         switches("-DintegrationTestRunner=\'${runner}\'")
         tasks("${testTask} --tests ${testJob.test}")
       }
     }
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	import CommonJobProperties as common
	import InfluxDBCredentialsHelper

	def jobs = [
	[
	name : 'beam_PerformanceTests_TextIOIT',
	description : 'Runs performance tests for TextIOIT',
	test : 'org.apache.beam.sdk.io.text.TextIOIT',
	githubTitle : 'Java TextIO Performance Test',
	githubTriggerPhrase: 'Run Java TextIO Performance Test',
	pipelineOptions : [
	bigQueryDataset : 'beam_performance',
	bigQueryTable : 'textioit_results',
	influxMeasurement : 'textioit_results',
	numberOfRecords : '25000000',
	expectedHash : 'f8453256ccf861e8a312c125dfe0e436',
	datasetSize : '1062290000',
	numWorkers : '5',
	autoscalingAlgorithm: 'NONE'
	]
	],
	[
	name : 'beam_PerformanceTests_Compressed_TextIOIT',
	description : 'Runs performance tests for TextIOIT with GZIP compression',
	test : 'org.apache.beam.sdk.io.text.TextIOIT',
	githubTitle : 'Java CompressedTextIO Performance Test',
	githubTriggerPhrase: 'Run Java CompressedTextIO Performance Test',
	pipelineOptions : [
	bigQueryDataset : 'beam_performance',
	bigQueryTable : 'compressed_textioit_results',
	influxMeasurement : 'compressed_textioit_results',
	numberOfRecords : '450000000',
	expectedHash : '8a3de973354abc6fba621c6797cc0f06',
	datasetSize : '1097840000',
	compressionType : 'GZIP',
	numWorkers : '5',
	autoscalingAlgorithm: 'NONE'
	]
	],
	[
	name : 'beam_PerformanceTests_ManyFiles_TextIOIT',
	description : 'Runs performance tests for TextIOIT with many output files',
	test : 'org.apache.beam.sdk.io.text.TextIOIT',
	githubTitle : 'Java ManyFilesTextIO Performance Test',
	githubTriggerPhrase: 'Run Java ManyFilesTextIO Performance Test',
	pipelineOptions : [
	bigQueryDataset : 'beam_performance',
	bigQueryTable : 'many_files_textioit_results',
	influxMeasurement : 'many_files_textioit_results',
	reportGcsPerformanceMetrics: 'true',
	gcsPerformanceMetrics : 'true',
	numberOfRecords : '25000000',
	expectedHash : 'f8453256ccf861e8a312c125dfe0e436',
	datasetSize : '1062290000',
	numberOfShards : '1000',
	numWorkers : '5',
	autoscalingAlgorithm : 'NONE'
	]

	],
	[
	name : 'beam_PerformanceTests_AvroIOIT',
	description : 'Runs performance tests for AvroIOIT',
	test : 'org.apache.beam.sdk.io.avro.AvroIOIT',
	githubTitle : 'Java AvroIO Performance Test',
	githubTriggerPhrase: 'Run Java AvroIO Performance Test',
	pipelineOptions : [
	numberOfRecords : '225000000',
	expectedHash : '2f9f5ca33ea464b25109c0297eb6aecb',
	datasetSize : '1089730000',
	bigQueryDataset : 'beam_performance',
	bigQueryTable : 'avroioit_results',
	influxMeasurement : 'avroioit_results',
	numWorkers : '5',
	autoscalingAlgorithm: 'NONE'
	]
	],
	[
	name : 'beam_PerformanceTests_TFRecordIOIT',
	description : 'Runs performance tests for beam_PerformanceTests_TFRecordIOIT',
	test : 'org.apache.beam.sdk.io.tfrecord.TFRecordIOIT',
	githubTitle : 'Java TFRecordIO Performance Test',
	githubTriggerPhrase: 'Run Java TFRecordIO Performance Test',
	pipelineOptions : [
	bigQueryDataset : 'beam_performance',
	bigQueryTable : 'tfrecordioit_results',
	influxMeasurement : 'tfrecordioit_results',
	numberOfRecords : '18000000',
	expectedHash : '543104423f8b6eb097acb9f111c19fe4',
	datasetSize : '1019380000',
	numWorkers : '5',
	autoscalingAlgorithm: 'NONE'
	]
	],
	[
	name : 'beam_PerformanceTests_XmlIOIT',
	description : 'Runs performance tests for beam_PerformanceTests_XmlIOIT',
	test : 'org.apache.beam.sdk.io.xml.XmlIOIT',
	githubTitle : 'Java XmlIOPerformance Test',
	githubTriggerPhrase: 'Run Java XmlIO Performance Test',
	pipelineOptions : [
	bigQueryDataset : 'beam_performance',
	bigQueryTable : 'xmlioit_results',
	influxMeasurement : 'xmlioit_results',
	numberOfRecords : '12000000',
	expectedHash : 'b3b717e7df8f4878301b20f314512fb3',
	datasetSize : '1076590000',
	charset : 'UTF-8',
	numWorkers : '5',
	autoscalingAlgorithm: 'NONE'
	]
	],
	[
	name : 'beam_PerformanceTests_ParquetIOIT',
	description : 'Runs performance tests for beam_PerformanceTests_ParquetIOIT',
	test : 'org.apache.beam.sdk.io.parquet.ParquetIOIT',
	githubTitle : 'Java ParquetIOPerformance Test',
	githubTriggerPhrase: 'Run Java ParquetIO Performance Test',
	pipelineOptions : [
	bigQueryDataset : 'beam_performance',
	bigQueryTable : 'parquetioit_results',
	influxMeasurement : 'parquetioit_results',
	numberOfRecords : '225000000',
	expectedHash : '2f9f5ca33ea464b25109c0297eb6aecb',
	datasetSize : '1087370000',
	numWorkers : '5',
	autoscalingAlgorithm: 'NONE'
	]
	],
	[
	name : 'beam_PerformanceTests_TextIOIT_HDFS',
	description : 'Runs performance tests for TextIOIT on HDFS',
	test : 'org.apache.beam.sdk.io.text.TextIOIT',
	githubTitle : 'Java TextIO Performance Test on HDFS',
	githubTriggerPhrase: 'Run Java TextIO Performance Test HDFS',
	pipelineOptions : [
	bigQueryDataset : 'beam_performance',
	bigQueryTable : 'textioit_hdfs_results',
	influxMeasurement : 'textioit_hdfs_results',
	numberOfRecords : '25000000',
	expectedHash : 'f8453256ccf861e8a312c125dfe0e436',
	datasetSize : '1062290000',
	numWorkers : '5',
	autoscalingAlgorithm: 'NONE'
	]

	],
	[
	name : 'beam_PerformanceTests_Compressed_TextIOIT_HDFS',
	description : 'Runs performance tests for TextIOIT with GZIP compression on HDFS',
	test : 'org.apache.beam.sdk.io.text.TextIOIT',
	githubTitle : 'Java CompressedTextIO Performance Test on HDFS',
	githubTriggerPhrase: 'Run Java CompressedTextIO Performance Test HDFS',
	pipelineOptions : [
	bigQueryDataset : 'beam_performance',
	bigQueryTable : 'compressed_textioit_hdfs_results',
	influxMeasurement : 'compressed_textioit_hdfs_results',
	numberOfRecords : '450000000',
	expectedHash : '8a3de973354abc6fba621c6797cc0f06',
	datasetSize : '1097840000',
	compressionType : 'GZIP',
	numWorkers : '5',
	autoscalingAlgorithm: 'NONE'
	]
	],
	[
	name : 'beam_PerformanceTests_ManyFiles_TextIOIT_HDFS',
	description : 'Runs performance tests for TextIOIT with many output files on HDFS',
	test : 'org.apache.beam.sdk.io.text.TextIOIT',
	githubTitle : 'Java ManyFilesTextIO Performance Test on HDFS',
	githubTriggerPhrase: 'Run Java ManyFilesTextIO Performance Test HDFS',
	pipelineOptions : [
	bigQueryDataset : 'beam_performance',
	bigQueryTable : 'many_files_textioit_hdfs_results',
	influxMeasurement : 'many_files_textioit_hdfs_results',
	reportGcsPerformanceMetrics: 'true',
	gcsPerformanceMetrics : 'true',
	numberOfRecords : '25000000',
	expectedHash : 'f8453256ccf861e8a312c125dfe0e436',
	datasetSize : '1062290000',
	numberOfShards : '1000',
	numWorkers : '5',
	autoscalingAlgorithm : 'NONE'
	]

	],
	[
	name : 'beam_PerformanceTests_AvroIOIT_HDFS',
	description : 'Runs performance tests for AvroIOIT on HDFS',
	test : 'org.apache.beam.sdk.io.avro.AvroIOIT',
	githubTitle : 'Java AvroIO Performance Test on HDFS',
	githubTriggerPhrase: 'Run Java AvroIO Performance Test HDFS',
	pipelineOptions : [
	bigQueryDataset : 'beam_performance',
	bigQueryTable : 'avroioit_hdfs_results',
	influxMeasurement : 'avroioit_hdfs_results',
	numberOfRecords : '225000000',
	expectedHash : '2f9f5ca33ea464b25109c0297eb6aecb',
	datasetSize : '1089730000',
	numWorkers : '5',
	autoscalingAlgorithm: 'NONE'
	]
	],
	[
	name : 'beam_PerformanceTests_TFRecordIOIT_HDFS',
	description : 'Runs performance tests for beam_PerformanceTests_TFRecordIOIT on HDFS',
	test : 'org.apache.beam.sdk.io.tfrecord.TFRecordIOIT',
	githubTitle : 'Java TFRecordIO Performance Test on HDFS',
	githubTriggerPhrase: 'Run Java TFRecordIO Performance Test HDFS',
	pipelineOptions : [
	numberOfRecords : '18000000',
	expectedHash : '543104423f8b6eb097acb9f111c19fe4',
	datasetSize : '1019380000',
	numWorkers : '5',
	autoscalingAlgorithm: 'NONE'
	]
	],
	[
	name : 'beam_PerformanceTests_XmlIOIT_HDFS',
	description : 'Runs performance tests for beam_PerformanceTests_XmlIOIT on HDFS',
	test : 'org.apache.beam.sdk.io.xml.XmlIOIT',
	githubTitle : 'Java XmlIOPerformance Test on HDFS',
	githubTriggerPhrase: 'Run Java XmlIO Performance Test HDFS',
	pipelineOptions : [
	bigQueryDataset : 'beam_performance',
	bigQueryTable : 'xmlioit_hdfs_results',
	influxMeasurement : 'xmlioit_hdfs_results',
	numberOfRecords : '12000000',
	expectedHash : 'b3b717e7df8f4878301b20f314512fb3',
	datasetSize : '1076590000',
	charset : 'UTF-8',
	numWorkers : '5',
	autoscalingAlgorithm: 'NONE'
	]
	],
	[
	name : 'beam_PerformanceTests_ParquetIOIT_HDFS',
	description : 'Runs performance tests for beam_PerformanceTests_ParquetIOIT on HDFS',
	test : 'org.apache.beam.sdk.io.parquet.ParquetIOIT',
	githubTitle : 'Java ParquetIOPerformance Test on HDFS',
	githubTriggerPhrase: 'Run Java ParquetIO Performance Test HDFS',
	pipelineOptions : [
	bigQueryDataset : 'beam_performance',
	bigQueryTable : 'parquetioit_hdfs_results',
	influxMeasurement : 'parquetioit_hdfs_results',
	numberOfRecords : '225000000',
	expectedHash : '2f9f5ca33ea464b25109c0297eb6aecb',
	datasetSize : '1087370000',
	numWorkers : '5',
	autoscalingAlgorithm: 'NONE'
	]
	]
	]

	jobs.findAll {
	it.name in [
	'beam_PerformanceTests_TextIOIT',
	'beam_PerformanceTests_Compressed_TextIOIT',
	'beam_PerformanceTests_ManyFiles_TextIOIT',
	'beam_PerformanceTests_AvroIOIT',
	'beam_PerformanceTests_TFRecordIOIT',
	'beam_PerformanceTests_XmlIOIT',
	'beam_PerformanceTests_ParquetIOIT'
	]
	}.forEach { testJob -> createGCSFileBasedIOITTestJob(testJob) }

	private void createGCSFileBasedIOITTestJob(testJob) {
	job(testJob.name) {
	description(testJob.description)
	common.setTopLevelMainJobProperties(delegate)
	common.enablePhraseTriggeringFromPullRequest(delegate, testJob.githubTitle, testJob.githubTriggerPhrase)
	common.setAutoJob(delegate, 'H /6 * *')
	InfluxDBCredentialsHelper.useCredentials(delegate)
	additionalPipelineArgs = [
	influxDatabase: InfluxDBCredentialsHelper.InfluxDBDatabaseName,
	influxHost: InfluxDBCredentialsHelper.InfluxDBHostUrl,
	]
	testJob.pipelineOptions.putAll(additionalPipelineArgs)

	def dataflowSpecificOptions = [
	runner : 'DataflowRunner',
	project : 'apache-beam-testing',
	tempRoot : 'gs://temp-storage-for-perf-tests',
	filenamePrefix: "gs://temp-storage-for-perf-tests/${testJob.name}/\${BUILD_ID}/",
	]

	Map allPipelineOptions = dataflowSpecificOptions << testJob.pipelineOptions
	String runner = "dataflow"
	String filesystem = "gcs"
	String testTask = ":sdks:java:io:file-based-io-tests:integrationTest"

	steps {
	gradle {
	rootBuildScriptDir(common.checkoutDir)
	common.setGradleSwitches(delegate)
	switches("--info")
	switches("-DintegrationTestPipelineOptions=\'${common.joinPipelineOptions(allPipelineOptions)}\'")
	switches("-Dfilesystem=\'${filesystem}\'")
	switches("-DintegrationTestRunner=\'${runner}\'")
	tasks("${testTask} --tests ${testJob.test}")
	}
	}
	}
	}

	jobs.findAll {
	it.name in [
	'beam_PerformanceTests_TextIOIT_HDFS',
	'beam_PerformanceTests_Compressed_TextIOIT_HDFS',
	'beam_PerformanceTests_ManyFiles_TextIOIT_HDFS',
	// TODO(BEAM-3945) TFRecord performance test is failing only when running on hdfs.
	// We need to fix this before enabling this job on jenkins.
	//'beam_PerformanceTests_TFRecordIOIT_HDFS',
	'beam_PerformanceTests_AvroIOIT_HDFS',
	'beam_PerformanceTests_XmlIOIT_HDFS',
	'beam_PerformanceTests_ParquetIOIT_HDFS'
	]
	}.forEach { testJob -> createHDFSFileBasedIOITTestJob(testJob) }

	private void createHDFSFileBasedIOITTestJob(testJob) {
	job(testJob.name) {
	description(testJob.description)
	common.setTopLevelMainJobProperties(delegate)
	common.enablePhraseTriggeringFromPullRequest(delegate, testJob.githubTitle, testJob.githubTriggerPhrase)
	common.setAutoJob(delegate, 'H /6 * *')
	InfluxDBCredentialsHelper.useCredentials(delegate)
	additionalPipelineArgs = [
	influxDatabase: InfluxDBCredentialsHelper.InfluxDBDatabaseName,
	influxHost: InfluxDBCredentialsHelper.InfluxDBHostUrl,
	]
	testJob.pipelineOptions.putAll(additionalPipelineArgs)

	String namespace = common.getKubernetesNamespace(testJob.name)
	String kubeconfig = common.getKubeconfigLocationForNamespace(namespace)
	Kubernetes k8s = Kubernetes.create(delegate, kubeconfig, namespace)

	k8s.apply(common.makePathAbsolute("src/.test-infra/kubernetes/hadoop/LargeITCluster/hdfs-multi-datanode-cluster.yml"))
	String hostName = "LOAD_BALANCER_IP"
	k8s.loadBalancerIP("hadoop", hostName)

	Map additionalOptions = [
	runner : 'DataflowRunner',
	project : 'apache-beam-testing',
	tempRoot : 'gs://temp-storage-for-perf-tests',
	hdfsConfiguration: /[{\\\"fs.defaultFS\\\":\\\"hdfs:$${hostName}:9000\\\",\\\"dfs.replication\\\":1}]/,
	filenamePrefix : "hdfs://\$${hostName}:9000/TEXTIO_IT_"
	]

	Map allPipelineOptions = testJob.pipelineOptions << additionalOptions
	String runner = "dataflow"
	String filesystem = "hdfs"
	String testTask = ":sdks:java:io:file-based-io-tests:integrationTest"

	steps {
	gradle {
	rootBuildScriptDir(common.checkoutDir)
	common.setGradleSwitches(delegate)
	switches("--info")
	switches("-DintegrationTestPipelineOptions=\'${common.joinPipelineOptions(allPipelineOptions)}\'")
	switches("-Dfilesystem=\'${filesystem}\'")
	switches("-DintegrationTestRunner=\'${runner}\'")
	tasks("${testTask} --tests ${testJob.test}")
	}
	}
	}
	}