| <?xml version="1.0" encoding="UTF-8"?> |
| <!-- |
| Licensed to the Apache Software Foundation (ASF) under one or more |
| contributor license agreements. See the NOTICE file distributed with |
| this work for additional information regarding copyright ownership. |
| The ASF licenses this file to You under the Apache License, Version 2.0 |
| (the "License"); you may not use this file except in compliance with |
| the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| --> |
| |
| <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> |
| |
| <modelVersion>4.0.0</modelVersion> |
| |
| <parent> |
| <groupId>org.apache.beam</groupId> |
| <artifactId>beam-sdks-java-io-parent</artifactId> |
| <version>2.3.0</version> |
| <relativePath>../pom.xml</relativePath> |
| </parent> |
| |
| <artifactId>beam-sdks-java-io-file-based-io-tests</artifactId> |
| <name>Apache Beam :: SDKs :: Java :: IO :: File-based-io-tests</name> |
| <description>Integration tests for reading/writing using file-based sources/sinks.</description> |
| |
| <profiles> |
| <!-- Include the Google Cloud Dataflow runner activated by -DintegrationTestRunner=dataflow --> |
| <profile> |
| <id>dataflow-runner</id> |
| <activation> |
| <property> |
| <name>integrationTestRunner</name> |
| <value>dataflow</value> |
| </property> |
| </activation> |
| <dependencies> |
| <dependency> |
| <groupId>org.apache.beam</groupId> |
| <artifactId>beam-runners-google-cloud-dataflow-java</artifactId> |
| <scope>runtime</scope> |
| </dependency> |
| </dependencies> |
| </profile> |
| |
| <!-- |
| This profile invokes PerfKitBenchmarker, which does benchmarking of |
| the IO ITs. The arguments passed to it allow it to invoke mvn again |
| with the desired benchmark. |
| |
| To invoke this, run: |
| |
| mvn verify -Dio-it-suite -pl sdks/java/io/file-based-io-tests |
| -DpkbLocation="path-to-pkb.py" \ |
| -DintegrationTestPipelineOptions='["––numberOfRecords=100000"]' \ |
| -DfileBasedIoItClass=file-based IO IT class, eg. org.apache.beam.sdk.io.text.TextIOIT |
| |
| For DirectRunner, please use -DforceDirectRunner=true argument |
| For other runners please check doc in BEAM-3060 and https://beam.apache.org/documentation/io/testing/ |
| --> |
| <profile> |
| <id>io-it-suite</id> |
| <activation> |
| <property><name>io-it-suite</name></property> |
| </activation> |
| <properties> |
| <!-- This is based on the location of the current pom relative to the root |
| See discussion in BEAM-2460 --> |
| <beamRootProjectDir>${project.parent.parent.parent.parent.basedir}</beamRootProjectDir> |
| </properties> |
| <build> |
| <plugins> |
| <plugin> |
| <groupId>org.codehaus.gmaven</groupId> |
| <artifactId>groovy-maven-plugin</artifactId> |
| <version>${groovy-maven-plugin.version}</version> |
| <executions> |
| <execution> |
| <id>find-supported-python-for-compile</id> |
| <phase>initialize</phase> |
| <goals> |
| <goal>execute</goal> |
| </goals> |
| <configuration> |
| <source>${beamRootProjectDir}/sdks/python/findSupportedPython.groovy</source> |
| </configuration> |
| </execution> |
| </executions> |
| </plugin> |
| |
| <plugin> |
| <groupId>org.codehaus.mojo</groupId> |
| <artifactId>exec-maven-plugin</artifactId> |
| <version>${maven-exec-plugin.version}</version> |
| <executions> |
| <execution> |
| <phase>verify</phase> |
| <goals> |
| <goal>exec</goal> |
| </goals> |
| </execution> |
| </executions> |
| <configuration> |
| <executable>${python.interpreter.bin}</executable> |
| <arguments> |
| <argument>${pkbLocation}</argument> |
| <argument>-benchmarks=beam_integration_benchmark</argument> |
| <argument>-beam_it_profile=io-it</argument> |
| <argument>-beam_it_timeout=${pkbTimeout}</argument> |
| <argument>-beam_location=${beamRootProjectDir}</argument> |
| <argument>-beam_prebuilt=true</argument> |
| <argument>-beam_sdk=java</argument> |
| <!-- runner overrides, controlled via forceDirectRunner --> |
| <argument>${pkbBeamRunnerProfile}</argument> |
| <argument>${pkbBeamRunnerOption}</argument> |
| <!-- specific to this IO --> |
| <argument>-beam_it_module=sdks/java/io/file-based-io-tests</argument> |
| <argument>-beam_it_class=${fileBasedIoItClass}</argument> |
| <!-- arguments typically defined by user --> |
| <argument>-beam_it_options=${integrationTestPipelineOptions}</argument> |
| <!-- |
| optional array of key=value items. It will be passed to |
| target mvn command by pkb. eg. -DpkbExtraProperties='["filesystem=local"]' |
| --> |
| <argument>-beam_extra_mvn_properties=${pkbExtraProperties}</argument> |
| </arguments> |
| </configuration> |
| </plugin> |
| <plugin> |
| <groupId>org.apache.maven.plugins</groupId> |
| <artifactId>maven-surefire-plugin</artifactId> |
| <version>${surefire-plugin.version}</version> |
| <configuration> |
| <skipTests>true</skipTests> |
| </configuration> |
| </plugin> |
| </plugins> |
| </build> |
| </profile> |
| |
| <!-- |
| This profile invokes PerfKitBenchmarker, which does benchmarking of |
| the IO ITs with HDFS filesystem. The arguments passed to it allow it |
| to invoke mvn again with the desired benchmark. |
| |
| To invoke this, run: |
| |
| mvn verify -Dio-it-hdfs-small -pl sdks/java/io/file-based-io-tests |
| -DpkbLocation="path-to-pkb.py" \ |
| -DintegrationTestPipelineOptions='["––numberOfRecords=100000", \ |
| "––tempRoot=gs://bucket-name/"]' \ |
| -DpkbExtraProperties='["filesystem=hdfs"]' \ |
| -DfileBasedIoItClass=file-based IO IT class, eg. org.apache.beam.sdk.io.text.TextIOIT |
| |
| For DirectRunner, please use -DforceDirectRunner=true argument and check |
| .test-infra/kubernetes/hadoop/SmallITCluster/pkb-config.yml for info about necessary modifications. |
| Line containing argument beam_kubernetes_scripts must be commented out, because test infrastructure |
| when testing on DirectRunner must be created manually using provided scripts. |
| |
| For other runners please check doc in BEAM-3060 and https://beam.apache.org/documentation/io/testing/ |
| --> |
| <profile> |
| <id>io-it-hdfs-small</id> |
| <activation> |
| <property><name>io-it-suite-hdfs-small</name></property> |
| </activation> |
| <properties> |
| <!-- This is based on the location of the current pom relative to the root |
| See discussion in BEAM-2460 --> |
| <beamRootProjectDir>${project.parent.parent.parent.parent.basedir}</beamRootProjectDir> |
| </properties> |
| <build> |
| <plugins> |
| <plugin> |
| <groupId>org.codehaus.gmaven</groupId> |
| <artifactId>groovy-maven-plugin</artifactId> |
| <version>${groovy-maven-plugin.version}</version> |
| <executions> |
| <execution> |
| <id>find-supported-python-for-compile</id> |
| <phase>initialize</phase> |
| <goals> |
| <goal>execute</goal> |
| </goals> |
| <configuration> |
| <source>${beamRootProjectDir}/sdks/python/findSupportedPython.groovy</source> |
| </configuration> |
| </execution> |
| </executions> |
| </plugin> |
| |
| <plugin> |
| <groupId>org.codehaus.mojo</groupId> |
| <artifactId>exec-maven-plugin</artifactId> |
| <version>${maven-exec-plugin.version}</version> |
| <executions> |
| <execution> |
| <phase>verify</phase> |
| <goals> |
| <goal>exec</goal> |
| </goals> |
| </execution> |
| </executions> |
| <configuration> |
| <executable>${python.interpreter.bin}</executable> |
| <arguments> |
| <argument>${pkbLocation}</argument> |
| <argument>-benchmarks=beam_integration_benchmark</argument> |
| <argument>-beam_it_profile=io-it</argument> |
| <argument>-beam_location=${beamRootProjectDir}</argument> |
| <argument>-beam_prebuilt=true</argument> |
| <argument>-beam_sdk=java</argument> |
| <argument>-kubeconfig=${kubeconfig}</argument> |
| <argument>-kubectl=${kubectl}</argument> |
| <!-- runner overrides, controlled via forceDirectRunner --> |
| <argument>${pkbBeamRunnerProfile}</argument> |
| <argument>${pkbBeamRunnerOption}</argument> |
| <!-- specific to this IO --> |
| <argument>-beam_it_module=sdks/java/io/file-based-io-tests</argument> |
| <argument>-beam_it_class=${fileBasedIoItClass}</argument> |
| <!-- arguments typically defined by user --> |
| <argument>-beam_it_options=${integrationTestPipelineOptions}</argument> |
| <argument>-beam_options_config_file=${beamRootProjectDir}/.test-infra/kubernetes/hadoop/SmallITCluster/pkb-config.yml</argument> |
| <argument>-beam_kubernetes_scripts=${beamRootProjectDir}/.test-infra/kubernetes/hadoop/SmallITCluster/hdfs-single-datanode-cluster.yml,${beamRootProjectDir}/.test-infra/kubernetes/hadoop/SmallITCluster/hdfs-single-datanode-cluster-for-local-dev.yml</argument> |
| <!-- |
| optional array of key=value items. It will be passed to |
| target mvn command by pkb. eg. -DpkbExtraProperties='["filesystem=local"]' |
| --> |
| <argument>-beam_extra_mvn_properties=${pkbExtraProperties}</argument> |
| </arguments> |
| </configuration> |
| </plugin> |
| |
| <plugin> |
| <groupId>org.apache.maven.plugins</groupId> |
| <artifactId>maven-surefire-plugin</artifactId> |
| <version>${surefire-plugin.version}</version> |
| <configuration> |
| <skipTests>true</skipTests> |
| </configuration> |
| </plugin> |
| </plugins> |
| </build> |
| </profile> |
| <profile> |
| <!-- Include the google-cloud-platform activated by -Dfilesystem=gcs |
| Support for protocol scheme gs:// - allow to read/write to google storage --> |
| <id>google-cloud-storage</id> |
| <activation> |
| <property> |
| <name>filesystem</name> |
| <value>gcs</value> |
| </property> |
| </activation> |
| <dependencies> |
| <dependency> |
| <groupId>org.apache.beam</groupId> |
| <artifactId>beam-sdks-java-io-google-cloud-platform</artifactId> |
| <scope>runtime</scope> |
| </dependency> |
| </dependencies> |
| </profile> |
| <profile> |
| <!-- Include the hadoop connectivity dependencies activated by -Dfilesystem=hdfs |
| Support for protocol scheme hdfs:// - allow to read/write to HDFS --> |
| <id>hadoop-distributed-file-system</id> |
| <activation> |
| <property> |
| <name>filesystem</name> |
| <value>hdfs</value> |
| </property> |
| </activation> |
| <dependencies> |
| <dependency> |
| <groupId>org.apache.beam</groupId> |
| <artifactId>beam-sdks-java-io-hadoop-file-system</artifactId> |
| <scope>runtime</scope> |
| </dependency> |
| <dependency> |
| <groupId>org.apache.hadoop</groupId> |
| <artifactId>hadoop-hdfs</artifactId> |
| <version>${apache.hadoop.version}</version> |
| <scope>runtime</scope> |
| </dependency> |
| <dependency> |
| <groupId>org.apache.hadoop</groupId> |
| <artifactId>hadoop-client</artifactId> |
| <version>${apache.hadoop.version}</version> |
| <scope>runtime</scope> |
| </dependency> |
| </dependencies> |
| </profile> |
| </profiles> |
| |
| <properties> |
| <apache.hadoop.version>2.7.1</apache.hadoop.version> |
| </properties> |
| |
| <dependencies> |
| <dependency> |
| <groupId>org.apache.beam</groupId> |
| <artifactId>beam-sdks-java-core</artifactId> |
| <scope>test</scope> |
| </dependency> |
| <dependency> |
| <groupId>org.apache.beam</groupId> |
| <artifactId>beam-runners-direct-java</artifactId> |
| <scope>test</scope> |
| </dependency> |
| <dependency> |
| <groupId>com.google.guava</groupId> |
| <artifactId>guava</artifactId> |
| <scope>test</scope> |
| </dependency> |
| <dependency> |
| <groupId>junit</groupId> |
| <artifactId>junit</artifactId> |
| <scope>test</scope> |
| </dependency> |
| <dependency> |
| <groupId>org.hamcrest</groupId> |
| <artifactId>hamcrest-all</artifactId> |
| <scope>test</scope> |
| </dependency> |
| <dependency> |
| <groupId>org.apache.beam</groupId> |
| <artifactId>beam-sdks-java-io-common</artifactId> |
| <scope>test</scope> |
| <classifier>tests</classifier> |
| </dependency> |
| <dependency> |
| <groupId>org.apache.beam</groupId> |
| <artifactId>beam-sdks-java-io-common</artifactId> |
| <scope>test</scope> |
| </dependency> |
| <dependency> |
| <groupId>org.apache.avro</groupId> |
| <artifactId>avro</artifactId> |
| <scope>test</scope> |
| </dependency> |
| |
| </dependencies> |
| </project> |