blob: 4759e88e0d2e4ed0d36f92f83351e5792f8b14dc [file] [log] [blame]
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
categories:
- name: File-based
description: These I/O connectors involve working with files.
rows:
- transform: FileIO
description: "General-purpose transforms for working with files: listing files (matching), reading and writing."
implementations:
- language: java
name: org.apache.beam.sdk.io.FileIO
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/FileIO.html
- language: py
name: apache_beam.io.FileIO
url: https://beam.apache.org/releases/pydoc/current/apache_beam.io.fileio.html
- transform: AvroIO
description: PTransforms for reading from and writing to [Avro](https://avro.apache.org/) files.
implementations:
- language: java
name: org.apache.beam.sdk.io.AvroIO
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/AvroIO.html
- language: py
name: apache_beam.io.avroio
url: https://beam.apache.org/releases/pydoc/current/apache_beam.io.avroio.html
- language: go
name: github.com/apache/beam/sdks/go/pkg/beam/io/avroio
url: https://godoc.org/github.com/apache/beam/sdks/go/pkg/beam/io/avroio
- transform: TextIO
description: PTransforms for reading and writing text files.
implementations:
- language: java
name: org.apache.beam.sdk.io.TextIO
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/TextIO.html
- language: py
name: apache_beam.io.textio
url: https://beam.apache.org/releases/pydoc/current/apache_beam.io.textio.html
- language: go
name: github.com/apache/beam/sdks/go/pkg/beam/io/textio
url: https://godoc.org/github.com/apache/beam/sdks/go/pkg/beam/io/textio
- transform: TFRecordIO
description: PTransforms for reading and writing [TensorFlow TFRecord](https://www.tensorflow.org/tutorials/load_data/tfrecord) files.
implementations:
- language: java
name: org.apache.beam.sdk.io.TFRecordIO
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/TFRecordIO.html
- language: py
name: apache_beam.io.tfrecordio
url: https://beam.apache.org/releases/pydoc/current/apache_beam.io.tfrecordio.html
- transform: XmlIO
description: Transforms for reading and writing XML files using [JAXB](https://www.oracle.com/technical-resources/articles/javase/jaxb.html) mappers.
implementations:
- language: java
name: org.apache.beam.sdk.io.xml.XmlIO
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/xml/XmlIO.html
- transform: TikaIO
description: Transforms for parsing arbitrary files using [Apache Tika](https://tika.apache.org/).
implementations:
- language: java
name: org.apache.beam.sdk.io.tika.TikaIO
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/tika/TikaIO.html
- transform: ParquetIO
description: IO for reading from and writing to [Parquet](https://parquet.apache.org/) files.
docs: /documentation/io/built-in/parquet/
implementations:
- language: java
name: org.apache.beam.sdk.io.parquet.ParquetIO
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/parquet/ParquetIO.html
- language: py
name: apache_beam.io.parquetio
url: https://beam.apache.org/releases/pydoc/current/apache_beam.io.parquetio.html
- transform: ThriftIO
description: PTransforms for reading and writing files containing [Thrift](https://thrift.apache.org/)-encoded data.
implementations:
- language: java
name: org.apache.beam.sdk.io.thrift.ThriftIO
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/thrift/ThriftIO.html
- transform: VcfIO
description: A source for reading from [VCF files](https://samtools.github.io/hts-specs/VCFv4.2.pdf) (version 4.x).
implementations:
- language: py
name: apache_beam.io.vcfio
url: https://beam.apache.org/releases/pydoc/current/apache_beam.io.vcfio.html
- transform: S3IO
description: A source for reading from and writing to [Amazon S3](https://aws.amazon.com/s3/).
implementations:
- language: py
name: apache_beam.io.aws.s3io
url: https://beam.apache.org/releases/pydoc/current/apache_beam.io.aws.s3io.html
- transform: GcsIO
description: A source for reading from and writing to [Google Cloud Storage](https://cloud.google.com/storage).
implementations:
- language: py
name: apache_beam.io.gcp.gcsio
url: https://beam.apache.org/releases/pydoc/current/apache_beam.io.gcp.gcsio.html
- name: FileSystem
description: Beam provides a File system interface that defines APIs for writing file systems agnostic code. Several I/O connectors are implemented as a FileSystem implementation.
rows:
- transform: HadoopFileSystem
description: "`FileSystem` implementation for accessing [Hadoop](https://hadoop.apache.org/) Distributed File System files."
implementations:
- language: java
name: org.apache.beam.sdk.io.hdfs.HadoopFileSystemRegistrar
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/hdfs/HadoopFileSystemRegistrar.html
- language: py
name: apache_beam.io.hadoopfilesystem
url: https://beam.apache.org/releases/pydoc/current/apache_beam.io.hadoopfilesystem.html
- transform: GcsFileSystem
description: "`FileSystem` implementation for [Google Cloud Storage](https://cloud.google.com/storage)."
implementations:
- language: java
name: org.apache.beam.sdk.extensions.gcp.storage.GcsFileSystemRegistrar
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/extensions/gcp/storage/GcsFileSystemRegistrar.html
- language: py
name: apache_beam.io.gcp.gcsfilesystem
url: https://beam.apache.org/releases/pydoc/current/apache_beam.io.gcp.gcsfilesystem.html
- language: go
name: github.com/apache/beam/sdks/go/pkg/beam/io/filesystem/gcs
url: https://godoc.org/github.com/apache/beam/sdks/go/pkg/beam/io/filesystem/gcs
- transform: LocalFileSystem
description: "`FileSystem` implementation for accessing files on disk."
implementations:
- language: java
name: org.apache.beam.sdk.io.LocalFileSystemRegistrar
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/LocalFileSystemRegistrar.html
- language: py
name: apache_beam.io.localfilesystem
url: https://beam.apache.org/releases/pydoc/current/apache_beam.io.localfilesystem.html
- language: go
name: github.com/apache/beam/sdks/go/pkg/beam/io/filesystem/local
url: https://godoc.org/github.com/apache/beam/sdks/go/pkg/beam/io/filesystem/local
- transform: S3FileSystem
description: "`FileSystem` implementation for [Amazon S3](https://aws.amazon.com/s3/)."
implementations:
- language: java
name: org.apache.beam.sdk.io.aws.s3.S3FileSystemRegistrar
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/hdfs/package-summary.html
- transform: In-memory
description: "`FileSystem` implementation in memory; useful for testing."
implementations:
- language: go
name: github.com/apache/beam/sdks/go/pkg/beam/io/filesystem/memfs
url: https://godoc.org/github.com/apache/beam/sdks/go/pkg/beam/io/filesystem/memfs
- name: Messaging
description: These I/O connectors typically involve working with unbounded sources that come from messaging sources.
rows:
- transform: KinesisIO
description: PTransforms for reading from and writing to [Kinesis](https://aws.amazon.com/kinesis/) streams.
implementations:
- language: java
name: org.apache.beam.sdk.io.kinesis.KinesisIO
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/kinesis/KinesisIO.html
- transform: AmqpIO
description: AMQP 1.0 protocol using the Apache QPid Proton-J library
implementations:
- language: java
name: org.apache.beam.sdk.io.amqp.AmqpIO
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/amqp/AmqpIO.html
- transform: KafkaIO
description: Read and Write PTransforms for [Apache Kafka](https://kafka.apache.org/).
implementations:
- language: java
name: org.apache.beam.sdk.io.kafka.KafkaIO
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/kafka/KafkaIO.html
- language: py
name: apache_beam.io.external.kafka
url: https://beam.apache.org/releases/pydoc/current/apache_beam.io.external.kafka.html
- transform: PubSubIO
description: Read and Write PTransforms for [Google Cloud Pub/Sub](https://cloud.google.com/pubsub) streams.
implementations:
- language: java
name: org.apache.beam.sdk.io.gcp.pubsub.PubsubIO
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/gcp/pubsub/PubsubIO.html
- language: py
name: apache_beam.io.gcp.pubsub
url: https://beam.apache.org/releases/pydoc/current/apache_beam.io.gcp.pubsub.html
- language: py
name: apache_beam.io.external.gcp.pubsub
url: https://beam.apache.org/releases/pydoc/current/apache_beam.io.external.gcp.pubsub.html
- language: go
name: github.com/apache/beam/sdks/go/pkg/beam/io/pubsubio
url: https://godoc.org/github.com/apache/beam/sdks/go/pkg/beam/io/pubsubio
- transform: JmsIO
description: An unbounded source for [JMS](https://www.oracle.com/java/technologies/java-message-service.html) destinations (queues or topics).
implementations:
- language: java
name: org.apache.beam.sdk.io.jms.JmsIO
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/jms/JmsIO.html
- transform: MqttIO
description: An unbounded source for [MQTT](https://mqtt.org/) broker.
implementations:
- language: java
name: org.apache.beam.sdk.io.mqtt.MqttIO
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/mqtt/MqttIO.html
- transform: RabbitMqIO
description: A IO to publish or consume messages with a RabbitMQ broker.
implementations:
- language: java
name: org.apache.beam.sdk.io.rabbitmq.RabbitMqIO
url: https://github.com/apache/beam/blob/master/sdks/java/io/rabbitmq/src/main/java/org/apache/beam/sdk/io/rabbitmq/RabbitMqIO.java
- transform: SqsIO
description: An unbounded source for [Amazon Simple Queue Service (SQS)](https://aws.amazon.com/sqs/).
implementations:
- language: java
name: org.apache.beam.sdk.io.aws.sqs.SqsIO
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/aws/sqs/SqsIO.html
- language: java
name: org.apache.beam.sdk.io.aws2.sqs.SqsIO
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/aws2/sqs/SqsIO.html
- transform: SnsIO
description: PTransforms for writing to [Amazon Simple Notification Service (SNS)](https://aws.amazon.com/sns/).
implementations:
- language: java
name: org.apache.beam.sdk.io.aws.sns.SnsIO
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/aws/sns/SnsIO.html
- language: java
name: org.apache.beam.sdk.io.aws2.sns.SnsIO
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/aws2/sns/SnsIO.html
- name: Database
description: These I/O connectors are used to connect to database systems.
rows:
- transform: CassandraIO
description: An IO to read from [Apache Cassandra](https://cassandra.apache.org/).
implementations:
- language: java
name: org.apache.beam.sdk.io.cassandra.CassandraIO
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/cassandra/CassandraIO.html
- transform: HadoopFormatIO
description: Allows for reading data from any source or writing data to any sink which implements [Hadoop](https://hadoop.apache.org/) InputFormat or OutputFormat.
docs: /documentation/io/built-in/hadoop/
implementations:
- language: java
name: org.apache.beam.sdk.io.hadoop.format.HadoopFormatIO
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/hadoop/format/HadoopFormatIO.html
- transform: HBaseIO
description: A bounded source and sink for [HBase](https://hbase.apache.org/).
implementations:
- language: java
name: org.apache.beam.sdk.io.hbase.HBaseIO
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/hbase/HBaseIO.html
- transform: HCatalogIO
description: HCatalog source supports reading of HCatRecord from a [HCatalog](https://cwiki.apache.org/confluence/display/Hive/HCatalog)-managed source, for example [Hive](https://hive.apache.org/).
docs: /documentation/io/built-in/hcatalog/
implementations:
- language: java
name: org.apache.beam.sdk.io.hcatalog.HCatalogIO
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/hcatalog/HCatalogIO.html
- transform: KuduIO
description: A bounded source and sink for [Kudu](https://kudu.apache.org/).
implementations:
- language: java
name: org.apache.beam.sdk.io.kudu
url: https://github.com/apache/beam/blob/master/sdks/java/io/kudu/src/main/java/org/apache/beam/sdk/io/kudu/KuduIO.java
- transform: SolrIO
description: Transforms for reading and writing data from/to [Solr](https://lucene.apache.org/solr/).
implementations:
- language: java
name: org.apache.beam.sdk.io.solr.SolrIO
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/solr/SolrIO.html
- transform: ElasticsearchIO
description: Transforms for reading and writing data from/to [Elasticsearch](https://www.elastic.co/elasticsearch/).
implementations:
- language: java
name: org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIO.html
- transform: BigQueryIO
description: Read from and write to [Google Cloud BigQuery](https://cloud.google.com/bigquery).
docs: /documentation/io/built-in/google-bigquery/
implementations:
- language: java
name: org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html
- language: py
name: apache_beam.io.gcp.bigquery
url: https://beam.apache.org/releases/pydoc/current/apache_beam.io.gcp.bigquery.html
- language: go
name: github.com/apache/beam/sdks/go/pkg/beam/io/bigqueryio
url: https://godoc.org/github.com/apache/beam/sdks/go/pkg/beam/io/bigqueryio
- transform: BigTableIO
description: Read from and write to [Google Cloud Bigtable](https://cloud.google.com/bigtable/).
implementations:
- language: java
name: org.apache.beam.sdk.io.gcp.bigtable.BigtableIO
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/gcp/bigtable/BigtableIO.html
- language: py
name: apache_beam.io.gcp.bigtableio module
url: https://beam.apache.org/releases/pydoc/current/apache_beam.io.gcp.bigtableio.html
- transform: DatastoreIO
description: Read from and write to [Google Cloud Datastore](https://cloud.google.com/datastore).
implementations:
- language: java
name: org.apache.beam.sdk.io.gcp.datastore.DatastoreIO
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/gcp/datastore/DatastoreIO.html
- language: py
name: apache_beam.io.gcp.datastore.v1new.datastoreio
url: https://beam.apache.org/releases/pydoc/current/apache_beam.io.gcp.datastore.v1new.datastoreio.html
- transform: SnowflakeIO
description: Experimental Transforms for reading from and writing to [Snowflake](https://www.snowflake.com/).
implementations:
- language: java
name: org.apache.beam.sdk.io.snowflake.SnowflakeIO
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/snowflake/SnowflakeIO.html
- transform: SpannerIO
description: Experimental Transforms for reading from and writing to [Google Cloud Spanner](https://cloud.google.com/spanner).
implementations:
- language: java
name: org.apache.beam.sdk.io.gcp.spanner.SpannerIO
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/gcp/spanner/SpannerIO.html
- transform: JdbcIO
description: IO to read and write data on [JDBC](https://docs.oracle.com/javase/tutorial/jdbc/basics/index.html).
implementations:
- language: java
name: org.apache.beam.sdk.io.jdbc.JdbcIO
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/jdbc/JdbcIO.html
- transform: MongoDbIO
description: IO to read and write data on [MongoDB](https://www.mongodb.com/).
implementations:
- language: java
name: org.apache.beam.sdk.io.mongodb.MongoDbIO
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/mongodb/MongoDbIO.html
- language: py
name: apache_beam.io.mongodbio
url: https://beam.apache.org/releases/pydoc/current/apache_beam.io.mongodbio.html
- transform: MongoDbGridFSIO
description: IO to read and write data on [MongoDB GridFS](https://docs.mongodb.com/manual/core/gridfs/).
implementations:
- language: java
name: org.apache.beam.sdk.io.mongodb.MongoDbGridFSIO
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/mongodb/MongoDbGridFSIO.html
- transform: RedisIO
description: An IO to manipulate a [Redis](https://redis.io/) key/value database.
implementations:
- language: java
name: org.apache.beam.sdk.io.redis.RedisIO
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/redis/RedisIO.html
- transform: DynamoDBIO
description: Read from and write to [Amazon DynamoDB](https://aws.amazon.com/dynamodb/).
implementations:
- language: java
name: org.apache.beam.sdk.io.aws.dynamodb.DynamoDBIO
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/aws/dynamodb/DynamoDBIO.html
- language: java
name: org.apache.beam.sdk.io.aws2.dynamodb.DynamoDBIO
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/aws2/dynamodb/DynamoDBIO.html
- transform: ClickHouseIO
description: Transform for writing to [ClickHouse](https://clickhouse.tech/).
implementations:
- language: java
name: org.apache.beam.sdk.io.clickhouse.ClickHouseIO
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/clickhouse/ClickHouseIO.html
- transform: DatabaseIO
description: Package databaseio provides transformations and utilities to interact with a generic database / SQL API.
implementations:
- language: go
name: github.com/apache/beam/sdks/go/pkg/beam/io/databaseio
url: https://godoc.org/github.com/apache/beam/sdks/go/pkg/beam/io/databaseio
- name: Miscellaneous
description: Miscellaneous I/O sources.
rows:
- transform: FlinkStreamingImpulseSource
description: A PTransform that provides an unbounded, streaming source of empty byte arrays. This can only be used with the Flink runner.
implementations:
- language: py
name: apache_beam.io.flink.flink_streaming_impulse_source
url: https://beam.apache.org/releases/pydoc/current/apache_beam.io.flink.flink_streaming_impulse_source.html
- transform: GenerateSequence
description: Generates a bounded or unbounded stream of integers.
implementations:
- language: java
name: org.apache.beam.sdk.io.GenerateSequence
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/GenerateSequence.html
- language: py
name: apache_beam.io.external.generate_sequence.GenerateSequence
url: https://beam.apache.org/releases/pydoc/current/apache_beam.io.external.generate_sequence.html
- transform: SplunkIO
description: A PTransform that provides an unbounded, streaming sink for Splunk's Http Event Collector (HEC).
implementations:
- language: java
name: org.apache.beam.sdk.io.splunk.SplunkIO
url: https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/splunk/SplunkIO.html