blob: b9802f11b6cf773d221332ce3b9b36639d0ec201 [file]
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# NOTE: This file is autogenerated and should not be edited by hand.
# Configs are generated based on the expansion service
# configuration in /sdks/standard_expansion_services.yaml.
# Refer to gen_xlang_wrappers.py for more info.
#
# Last updated on: 2026-06-11
- default_service: sdks:java:io:expansion-service:shadowJar
description: ''
destinations:
python: apache_beam/io
fields:
- description: The Datadog API key.
name: api_key
nullable: false
type: str
- description: The number of events to batch together for each write.
name: batch_count
nullable: true
type: int32
- description: Specifies how to handle errors.
name: error_handling
nullable: true
type: Row(output=<class 'str'>)
- description: The maximum buffer size in bytes.
name: max_buffer_size
nullable: true
type: int64
- description: The minimum number of events to batch together for each write.
name: min_batch_count
nullable: true
type: int32
- description: The degree of parallelism for writing.
name: parallelism
nullable: true
type: int32
- description: The Datadog API URL.
name: url
nullable: false
type: str
identifier: beam:schematransform:org.apache.beam:datadog_write:v1
name: DatadogWrite
- default_service: sdks:java:io:expansion-service:shadowJar
description: 'Outputs a PCollection of Beam Rows, each containing a single INT64
number called "value". The count is produced from the given "start" value and
either up to the given "end" or until 2^63 - 1.
To produce an unbounded PCollection, simply do not specify an "end" value. Unbounded
sequences can specify a "rate" for output elements.
In all cases, the sequence of numbers is generated in parallel, so there is no
inherent ordering between the generated values'
destinations:
python: apache_beam/io
fields:
- description: The maximum number to generate (exclusive). Will be an unbounded
sequence if left unspecified.
name: end
nullable: true
type: int64
- description: Specifies the rate to generate a given number of elements per a given
number of seconds. Applicable only to unbounded sequences.
name: rate
nullable: true
type: Row(elements=<class 'int64'>, seconds=typing.Optional[int64])
- description: The minimum number to generate (inclusive).
name: start
nullable: false
type: int64
identifier: beam:schematransform:org.apache.beam:generate_sequence:v1
name: GenerateSequence
- default_service: sdks:java:io:expansion-service:shadowJar
description: ''
destinations:
python: apache_beam/io
fields:
- description: The number of documents to include in each batch write.
name: batch_size
nullable: true
type: int64
- description: The MongoDB collection to write to.
name: collection
nullable: false
type: str
- description: The MongoDB database to write to.
name: database
nullable: false
type: str
- description: 'This option specifies whether and where to output unwritable rows.
Note: Error handling is currently limited to data conversion failures before
sending to the MongoDB driver, as the underlying MongoDbIO does not yet support
dead-letter queues for write failures.'
name: error_handling
nullable: true
type: Row(output=<class 'str'>)
- description: The connection URI for the MongoDB server.
name: uri
nullable: false
type: str
identifier: beam:schematransform:org.apache.beam:mongodb_write:v1
name: MongodbWrite
- default_service: sdks:java:io:expansion-service:shadowJar
description: ''
destinations:
python: apache_beam/io
fields:
- description: Decompression type to use when reading input files.
name: compression
nullable: false
type: str
- description: This option specifies whether and where to output unwritable rows.
name: error_handling
nullable: true
type: Row(output=<class 'str'>)
- description: Filename or file pattern used to find input files.
name: file_pattern
nullable: false
type: str
- description: Validate file pattern.
name: validate
nullable: false
type: boolean
identifier: beam:schematransform:org.apache.beam:tfrecord_read:v1
name: TfrecordRead
- default_service: sdks:java:io:expansion-service:shadowJar
description: ''
destinations:
python: apache_beam/io
fields:
- description: Option to indicate the output sink's compression type. Default is
NONE.
name: compression
nullable: false
type: str
- description: This option specifies whether and where to output unwritable rows.
name: error_handling
nullable: true
type: Row(output=<class 'str'>)
- description: The suffix of each file written, combined with prefix and shardTemplate.
name: filename_suffix
nullable: true
type: str
- description: Maximum number of writers created in a bundle before spilling to
shuffle.
name: max_num_writers_per_bundle
nullable: true
type: int32
- description: Whether to skip the spilling of data caused by having maxNumWritersPerBundle.
name: no_spilling
nullable: true
type: boolean
- description: The number of shards to use, or 0 for automatic.
name: num_shards
nullable: false
type: int32
- description: The directory to which files will be written.
name: output_prefix
nullable: false
type: str
- description: The shard template of each file written, combined with prefix and
suffix.
name: shard_template
nullable: true
type: str
identifier: beam:schematransform:org.apache.beam:tfrecord_write:v1
name: TfrecordWrite
- default_service: sdks:java:io:messaging-expansion-service:shadowJar
description: 'Reads messages from an MQTT broker and outputs each payload as a single
`bytes` field.
By default the read is unbounded (streaming): it keeps consuming messages from
the subscribed topic until the pipeline is stopped. Setting `maxNumRecords` and/or
`maxReadTimeSeconds` bounds the read, producing a bounded (batch) PCollection.
Note: streaming reads require a runner that supports portable streaming (e.g.
Prism, Flink, or Dataflow). The legacy local Python DirectRunner does not execute
portable streaming cross-language reads.'
destinations:
python: apache_beam/io
fields:
- description: Configuration options to set up the MQTT connection.
name: connection_configuration
nullable: false
type: Row(client_id=typing.Optional[str], password=typing.Optional[str], server_uri=<class
'str'>, topic=typing.Optional[str], username=typing.Optional[str])
- description: The max number of records to receive. Setting this will result in
a bounded PCollection.
name: max_num_records
nullable: true
type: int64
- description: The maximum time for this source to read messages. Setting this will
result in a bounded PCollection.
name: max_read_time_seconds
nullable: true
type: int64
identifier: beam:schematransform:org.apache.beam:mqtt_read:v1
name: ReadFromMqtt
- default_service: sdks:java:io:messaging-expansion-service:shadowJar
description: 'Publishes messages to an MQTT broker. Expects an input PCollection
of rows with a single `bytes` field, each of which is published as one MQTT message.
Works with both bounded (batch) and unbounded (streaming) input PCollections.'
destinations:
python: apache_beam/io
fields:
- description: Configuration options to set up the MQTT connection.
name: connection_configuration
nullable: false
type: Row(client_id=typing.Optional[str], password=typing.Optional[str], server_uri=<class
'str'>, topic=typing.Optional[str], username=typing.Optional[str])
- description: Whether or not the publish message should be retained by the messaging
engine. When a subscriber connects, it gets the latest retained message. Defaults
to `False`, which will clear the retained message from the server.
name: retained
nullable: true
type: boolean
identifier: beam:schematransform:org.apache.beam:mqtt_write:v1
name: WriteToMqtt