| # |
| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # |
| |
| # This file enumerates the various IOs that are available by default as |
| # top-level transforms in Beam's YAML. |
| # |
| # Note that there may be redundant implementations. In these cases the specs |
| # should be kept in sync. |
| # TODO(yaml): See if this can be enforced programmatically. |
| |
| # BigQuery |
| - type: renaming |
| transforms: |
| 'ReadFromBigQuery': 'ReadFromBigQuery' |
| 'WriteToBigQuery': 'WriteToBigQuery' |
| config: |
| mappings: |
| 'ReadFromBigQuery': |
| query: 'query' |
| table: 'table_spec' |
| fields: 'selected_fields' |
| row_restriction: 'row_restriction' |
| 'WriteToBigQuery': |
| table: 'table' |
| create_disposition: 'create_disposition' |
| write_disposition: 'write_disposition' |
| error_handling: 'error_handling' |
| # TODO(https://github.com/apache/beam/issues/30058): Required until autosharding support is fixed |
| num_streams: 'num_streams' |
| underlying_provider: |
| type: beamJar |
| transforms: |
| 'ReadFromBigQuery': 'beam:schematransform:org.apache.beam:bigquery_storage_read:v1' |
| 'WriteToBigQuery': 'beam:schematransform:org.apache.beam:bigquery_write:v1' |
| config: |
| gradle_target: 'sdks:java:io:google-cloud-platform:expansion-service:shadowJar' |
| managed_replacement: |
| # Following transforms may be replaced with equivalent managed transforms, |
| # if the pipelines 'updateCompatibilityBeamVersion' match the provided |
| # version. |
| 'ReadFromBigQuery': '2.69.0' |
| 'WriteToBigQuery': '2.69.0' |
| |
| # Kafka |
| - type: renaming |
| transforms: |
| 'ReadFromKafka': 'ReadFromKafka' |
| 'WriteToKafka': 'WriteToKafka' |
| config: |
| mappings: |
| 'ReadFromKafka': |
| 'schema': 'schema' |
| 'consumer_config': 'consumer_config_updates' |
| 'format': 'format' |
| 'topic': 'topic' |
| 'bootstrap_servers': 'bootstrap_servers' |
| 'confluent_schema_registry_url': 'confluent_schema_registry_url' |
| 'confluent_schema_registry_subject': 'confluent_schema_registry_subject' |
| 'auto_offset_reset_config': 'auto_offset_reset_config' |
| 'error_handling': 'error_handling' |
| 'file_descriptor_path': 'file_descriptor_path' |
| 'message_name': 'message_name' |
| 'WriteToKafka': |
| 'format': 'format' |
| 'topic': 'topic' |
| 'bootstrap_servers': 'bootstrap_servers' |
| 'producer_config_updates': 'producer_config_updates' |
| 'error_handling': 'error_handling' |
| 'file_descriptor_path': 'file_descriptor_path' |
| 'message_name': 'message_name' |
| 'schema': 'schema' |
| underlying_provider: |
| type: beamJar |
| transforms: |
| 'ReadFromKafka': 'beam:schematransform:org.apache.beam:kafka_read:v1' |
| 'WriteToKafka': 'beam:schematransform:org.apache.beam:kafka_write:v1' |
| config: |
| gradle_target: 'sdks:java:io:expansion-service:shadowJar' |
| managed_replacement: |
| # Following transforms may be replaced with equivalent managed transforms, |
| # if the pipelines 'updateCompatibilityBeamVersion' match the provided |
| # version. |
| 'ReadFromKafka': '2.65.0' |
| 'WriteToKafka': '2.65.0' |
| |
| # TODO(yaml): Tests are assuming python providers are before java ones, hence |
| # the order below. This should be fixed in the future. |
| |
| # Python Providers |
| - type: python |
| transforms: |
| 'ReadFromBigQuery': 'apache_beam.yaml.yaml_io.read_from_bigquery' |
| # Disable until https://github.com/apache/beam/issues/28162 is resolved. |
| # 'WriteToBigQuery': 'apache_beam.yaml.yaml_io.write_to_bigquery' |
| 'ReadFromText': 'apache_beam.yaml.yaml_io.read_from_text' |
| 'WriteToText': 'apache_beam.yaml.yaml_io.write_to_text' |
| 'ReadFromPubSub': 'apache_beam.yaml.yaml_io.read_from_pubsub' |
| 'WriteToPubSub': 'apache_beam.yaml.yaml_io.write_to_pubsub' |
| 'ReadFromIceberg': 'apache_beam.yaml.yaml_io.read_from_iceberg' |
| 'WriteToIceberg': 'apache_beam.yaml.yaml_io.write_to_iceberg' |
| 'ReadFromTFRecord': 'apache_beam.yaml.yaml_io.read_from_tfrecord' |
| 'WriteToTFRecord': 'apache_beam.yaml.yaml_io.write_to_tfrecord' |
| |
| # General File Formats |
| # Declared as a renaming transform to avoid exposing all |
| # (implementation-specific) pandas arguments and aligning with possible Java |
| # implementation. |
| # Invoking these directly as a PyTransform is still an option for anyone wanting |
| # to use these power-features in a language-dependent manner. |
| - type: renaming |
| transforms: |
| 'ReadFromCsv': 'ReadFromCsv' |
| 'WriteToCsv': 'WriteToCsv' |
| 'ReadFromJson': 'ReadFromJson' |
| 'WriteToJson': 'WriteToJson' |
| 'ReadFromParquet': 'ReadFromParquet' |
| 'WriteToParquet': 'WriteToParquet' |
| 'ReadFromAvro': 'ReadFromAvro' |
| 'WriteToAvro': 'WriteToAvro' |
| config: |
| mappings: |
| 'ReadFromCsv': |
| path: 'path' |
| delimiter: 'sep' |
| comment: 'comment' |
| filename_column: 'filename_column' |
| 'WriteToCsv': |
| path: 'path' |
| delimiter: 'sep' |
| 'ReadFromJson': |
| path: 'path' |
| 'WriteToJson': |
| path: 'path' |
| 'ReadFromParquet': |
| path: 'file_pattern' |
| 'WriteToParquet': |
| path: 'file_path_prefix' |
| 'ReadFromAvro': |
| path: 'file_pattern' |
| 'WriteToAvro': |
| path: 'file_path_prefix' |
| defaults: |
| 'ReadFromParquet': |
| as_rows: True |
| 'ReadFromAvro': |
| as_rows: True |
| underlying_provider: |
| type: python |
| transforms: |
| 'ReadFromCsv': 'apache_beam.io.ReadFromCsv' |
| 'WriteToCsv': 'apache_beam.io.WriteToCsv' |
| 'ReadFromJson': 'apache_beam.io.ReadFromJson' |
| 'WriteToJson': 'apache_beam.io.WriteToJson' |
| 'ReadFromParquet': 'apache_beam.io.ReadFromParquet' |
| 'WriteToParquet': 'apache_beam.io.WriteToParquet' |
| 'ReadFromAvro': 'apache_beam.io.ReadFromAvro' |
| 'WriteToAvro': 'apache_beam.io.WriteToAvro' |
| |
| # BeamJar Providers |
| - type: beamJar |
| transforms: |
| 'WriteToCsv': 'beam:schematransform:org.apache.beam:csv_write:v1' |
| 'WriteToJson': 'beam:schematransform:org.apache.beam:json_write:v1' |
| config: |
| gradle_target: 'sdks:java:extensions:schemaio-expansion-service:shadowJar' |
| |
| # Databases |
| - type: renaming |
| transforms: |
| 'ReadFromJdbc': 'ReadFromJdbc' |
| 'WriteToJdbc': 'WriteToJdbc' |
| 'ReadFromMySql': 'ReadFromMySql' |
| 'WriteToMySql': 'WriteToMySql' |
| 'ReadFromPostgres': 'ReadFromPostgres' |
| 'WriteToPostgres': 'WriteToPostgres' |
| 'ReadFromOracle': 'ReadFromOracle' |
| 'WriteToOracle': 'WriteToOracle' |
| 'ReadFromSqlServer': 'ReadFromSqlServer' |
| 'WriteToSqlServer': 'WriteToSqlServer' |
| config: |
| mappings: |
| 'ReadFromJdbc': |
| url: 'jdbc_url' |
| connection_init_sql: 'connection_init_sql' |
| connection_properties: 'connection_properties' |
| disable_auto_commit: 'disable_auto_commit' |
| driver_class_name: 'driver_class_name' |
| driver_jars: 'driver_jars' |
| fetch_size: 'fetch_size' |
| output_parallelization: 'output_parallelization' |
| password: 'password' |
| query: 'read_query' |
| table: 'location' |
| partition_column : 'partition_column' |
| num_partitions: 'num_partitions' |
| type: 'jdbc_type' |
| username: 'username' |
| 'WriteToJdbc': |
| url: 'jdbc_url' |
| auto_sharding: 'autosharding' |
| connection_init_sql: 'connection_init_sql' |
| connection_properties: 'connection_properties' |
| driver_class_name: 'driver_class_name' |
| driver_jars: 'driver_jars' |
| password: 'password' |
| table: 'location' |
| batch_size: 'batch_size' |
| type: 'jdbc_type' |
| username: 'username' |
| query: 'write_statement' |
| 'ReadFromMySql': 'ReadFromJdbc' |
| 'WriteToMySql': 'WriteToJdbc' |
| 'ReadFromPostgres': 'ReadFromJdbc' |
| 'WriteToPostgres': 'WriteToJdbc' |
| 'ReadFromOracle': 'ReadFromJdbc' |
| 'WriteToOracle': 'WriteToJdbc' |
| 'ReadFromSqlServer': 'ReadFromJdbc' |
| 'WriteToSqlServer': 'WriteToJdbc' |
| defaults: |
| 'ReadFromMySql': |
| driver_class_name: '' |
| driver_jars: '' |
| jdbc_type: '' |
| 'WriteToMySql': |
| driver_class_name: '' |
| driver_jars: '' |
| jdbc_type: '' |
| 'ReadFromPostgres': |
| connection_init_sql: '' |
| driver_class_name: '' |
| driver_jars: '' |
| jdbc_type: '' |
| 'WriteToPostgres': |
| connection_init_sql: '' |
| driver_class_name: '' |
| driver_jars: '' |
| jdbc_type: '' |
| 'ReadFromOracle': |
| connection_init_sql: '' |
| driver_class_name: '' |
| driver_jars: '' |
| jdbc_type: '' |
| 'WriteToOracle': |
| connection_init_sql: '' |
| driver_class_name: '' |
| driver_jars: '' |
| jdbc_type: '' |
| 'ReadFromSqlServer': |
| connection_init_sql: '' |
| driver_class_name: '' |
| driver_jars: '' |
| jdbc_type: '' |
| 'WriteToSqlServer': |
| connection_init_sql: '' |
| driver_class_name: '' |
| driver_jars: '' |
| jdbc_type: '' |
| underlying_provider: |
| type: beamJar |
| transforms: |
| 'ReadFromJdbc': 'beam:schematransform:org.apache.beam:jdbc_read:v1' |
| 'ReadFromMySql': 'beam:schematransform:org.apache.beam:mysql_read:v1' |
| 'ReadFromPostgres': 'beam:schematransform:org.apache.beam:postgres_read:v1' |
| 'ReadFromOracle': 'beam:schematransform:org.apache.beam:oracle_read:v1' |
| 'ReadFromSqlServer': 'beam:schematransform:org.apache.beam:sql_server_read:v1' |
| 'WriteToJdbc': 'beam:schematransform:org.apache.beam:jdbc_write:v1' |
| 'WriteToMySql': 'beam:schematransform:org.apache.beam:mysql_write:v1' |
| 'WriteToPostgres': 'beam:schematransform:org.apache.beam:postgres_write:v1' |
| 'WriteToOracle': 'beam:schematransform:org.apache.beam:oracle_write:v1' |
| 'WriteToSqlServer': 'beam:schematransform:org.apache.beam:sql_server_write:v1' |
| config: |
| gradle_target: 'sdks:java:extensions:schemaio-expansion-service:shadowJar' |
| |
| # Spanner |
| - type: renaming |
| transforms: |
| 'ReadFromSpanner': 'ReadFromSpanner' |
| 'WriteToSpanner': 'WriteToSpanner' |
| config: |
| mappings: |
| 'ReadFromSpanner': |
| project: 'project_id' |
| instance: 'instance_id' |
| database: 'database_id' |
| table: 'table_id' |
| query: 'query' |
| columns: 'columns' |
| index: 'index' |
| batching: 'batching' |
| error_handling: 'error_handling' |
| 'WriteToSpanner': |
| project: 'project_id' |
| instance: 'instance_id' |
| database: 'database_id' |
| table: 'table_id' |
| error_handling: 'error_handling' |
| underlying_provider: |
| type: beamJar |
| transforms: |
| 'ReadFromSpanner': 'beam:schematransform:org.apache.beam:spanner_read:v1' |
| 'WriteToSpanner': 'beam:schematransform:org.apache.beam:spanner_write:v1' |
| config: |
| gradle_target: 'sdks:java:io:google-cloud-platform:expansion-service:shadowJar' |
| |
| # TFRecord |
| - type: renaming |
| transforms: |
| 'ReadFromTFRecord': 'ReadFromTFRecord' |
| 'WriteToTFRecord': 'WriteToTFRecord' |
| config: |
| mappings: |
| 'ReadFromTFRecord': |
| file_pattern: 'file_pattern' |
| compression_type: 'compression' |
| validate: 'validate' |
| 'WriteToTFRecord': |
| file_path_prefix: 'output_prefix' |
| shard_name_template: 'shard_template' |
| file_name_suffix: 'filename_suffix' |
| num_shards: 'num_shards' |
| compression_type: 'compression' |
| underlying_provider: |
| type: beamJar |
| transforms: |
| 'ReadFromTFRecord': 'beam:schematransform:org.apache.beam:tfrecord_read:v1' |
| 'WriteToTFRecord': 'beam:schematransform:org.apache.beam:tfrecord_write:v1' |
| config: |
| gradle_target: 'sdks:java:io:expansion-service:shadowJar' |
| |
| #BigTable |
| - type: renaming |
| transforms: |
| 'ReadFromBigTable': 'ReadFromBigTable' |
| 'WriteToBigTable': 'WriteToBigTable' |
| config: |
| mappings: |
| #Temp removing read from bigTable IO |
| 'ReadFromBigTable': |
| project: 'project_id' |
| instance: 'instance_id' |
| table: 'table_id' |
| flatten: "flatten" |
| 'WriteToBigTable': |
| project: 'project_id' |
| instance: 'instance_id' |
| table: 'table_id' |
| underlying_provider: |
| type: beamJar |
| transforms: |
| 'ReadFromBigTable': 'beam:schematransform:org.apache.beam:bigtable_read:v1' |
| 'WriteToBigTable': 'beam:schematransform:org.apache.beam:bigtable_write:v1' |
| config: |
| gradle_target: 'sdks:java:io:google-cloud-platform:expansion-service:shadowJar' |
| |
| #IcebergCDC |
| - type: renaming |
| transforms: |
| 'ReadFromIcebergCDC': 'ReadFromIcebergCDC' |
| config: |
| mappings: |
| 'ReadFromIcebergCDC': |
| table: 'table' |
| catalog_name: 'catalog_name' |
| catalog_properties: 'catalog_properties' |
| config_properties: 'config_properties' |
| drop: 'drop' |
| filter: 'filter' |
| from_snapshot: 'from_snapshot' |
| from_timestamp: 'from_timestamp' |
| keep: 'keep' |
| poll_interval_seconds: 'poll_interval_seconds' |
| starting_strategy: 'starting_strategy' |
| streaming: 'streaming' |
| to_snapshot: 'to_snapshot' |
| to_timestamp: 'to_timestamp' |
| underlying_provider: |
| type: beamJar |
| transforms: |
| 'ReadFromIcebergCDC': 'beam:schematransform:org.apache.beam:iceberg_cdc_read:v1' |
| config: |
| gradle_target: 'sdks:java:io:expansion-service:shadowJar' |